diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 191ed5f523a74..90cfd8cedd51b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2671,18 +2671,6 @@ def int_amdgcn_cs_chain: ], [IntrConvergent, IntrNoReturn, ImmArg>]>; -// Run a function with all the lanes enabled. Only direct calls are allowed. The -// first argument is the callee, which must have the `amdgpu_gfx_whole_wave` -// calling convention and must not be variadic. The remaining arguments to the -// callee are taken from the arguments passed to the intrinsic. Lanes that are -// inactive at the point of the call will receive poison. The return value is -// the return value of the callee for the active lanes (there is no return -// value in the inactive ones). -def int_amdgcn_call_whole_wave: - Intrinsic<[llvm_any_ty], // The return type of the callee. - [llvm_anyptr_ty, // The callee. - llvm_vararg_ty], // The arguments to the callee. - [IntrConvergent]>; //===----------------------------------------------------------------------===// // CI+ Intrinsics diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 787543df1f0f0..bbfae570e1e1a 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2556,7 +2556,6 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getOrCreateVReg(*ConstantInt::getTrue(CI.getType()))); return true; case Intrinsic::amdgcn_cs_chain: - case Intrinsic::amdgcn_call_whole_wave: return translateCallBase(CI, MIRBuilder); case Intrinsic::fptrunc_round: { uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d5b904055e547..d0815e9f51822 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7984,43 +7984,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, HasTailCall = true; return; } - case Intrinsic::amdgcn_call_whole_wave: { - TargetLowering::ArgListTy Args; - - // The first argument is the callee. Skip it when assembling the call args. - TargetLowering::ArgListEntry Arg; - for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) { - Arg.Node = getValue(I.getArgOperand(Idx)); - Arg.Ty = I.getArgOperand(Idx)->getType(); - Arg.setAttributes(&I, Idx); - Args.push_back(Arg); - } - - SDValue ConvControlToken; - if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { - auto *Token = Bundle->Inputs[0].get(); - ConvControlToken = getValue(Token); - } - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()) - .setChain(getRoot()) - .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(), - getValue(I.getArgOperand(0)), std::move(Args)) - .setTailCall(false) - .setIsPreallocated( - I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) - .setConvergent(I.isConvergent()) - .setConvergenceControlToken(ConvControlToken); - CLI.CB = &I; - - std::pair Result = - lowerInvokable(CLI, /*EHPadBB=*/nullptr); - - if (Result.first.getNode()) - setValue(&I, Result.first); - return; - } case Intrinsic::ptrmask: { SDValue Ptr = getValue(I.getOperand(0)); SDValue Mask = getValue(I.getOperand(1)); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index f3f0ae5233977..ca3f148f881a4 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6612,36 +6612,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "Value for inactive lanes must be a VGPR function argument", &Call); break; } - case Intrinsic::amdgcn_call_whole_wave: { - auto F = dyn_cast(Call.getArgOperand(0)); - Check(F, "Indirect whole wave calls are not allowed", &Call); - - CallingConv::ID CC = F->getCallingConv(); - Check(CC == CallingConv::AMDGPU_Gfx_WholeWave, - "Callee must have the amdgpu_gfx_whole_wave calling convention", - &Call); - - Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call); - - Check(Call.arg_size() == F->arg_size(), - "Call argument count must match callee argument count", &Call); - - // The first argument of the call is the callee, and the first argument of - // the callee is the active mask. The rest of the arguments must match. - Check(F->arg_begin()->getType()->isIntegerTy(1), - "Callee must have i1 as its first argument", &Call); - for (auto [CallArg, FuncArg] : - drop_begin(zip_equal(Call.args(), F->args()))) { - Check(CallArg->getType() == FuncArg.getType(), - "Argument types must match", &Call); - - // Check that inreg attributes match between call site and function - Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) == - FuncArg.hasInRegAttr(), - "Argument inreg attributes must match", &Call); - } - break; - } case Intrinsic::amdgcn_s_prefetch_data: { Check( AMDGPU::isFlatGlobalAddrSpace( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 3ff6e22fbb943..3d8d274f06246 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -1464,22 +1464,9 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { if (Function *F = Info.CB->getCalledFunction()) if (F->isIntrinsic()) { - switch (F->getIntrinsicID()) { - case Intrinsic::amdgcn_cs_chain: - return lowerChainCall(MIRBuilder, Info); - case Intrinsic::amdgcn_call_whole_wave: - Info.CallConv = CallingConv::AMDGPU_Gfx_WholeWave; - - // Get the callee from the original instruction, so it doesn't look like - // this is an indirect call. - Info.Callee = MachineOperand::CreateGA( - cast(Info.CB->getOperand(0)), /*Offset=*/0); - Info.OrigArgs.erase(Info.OrigArgs.begin()); - Info.IsVarArg = false; - break; - default: - llvm_unreachable("Unexpected intrinsic call"); - } + assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain && + "Unexpected intrinsic"); + return lowerChainCall(MIRBuilder, Info); } if (Info.IsVarArg) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll deleted file mode 100644 index eac0767c88d80..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll +++ /dev/null @@ -1,174 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=DAGISEL -; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GISEL - -declare amdgpu_gfx_whole_wave i32 @good_callee(i1 %active, i32 %x, i32 %y, i32 inreg %c) - -define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) { -; DAGISEL-LABEL: basic_test: -; DAGISEL: ; %bb.0: -; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL-NEXT: s_wait_expcnt 0x0 -; DAGISEL-NEXT: s_wait_samplecnt 0x0 -; DAGISEL-NEXT: s_wait_bvhcnt 0x0 -; DAGISEL-NEXT: s_wait_kmcnt 0x0 -; DAGISEL-NEXT: s_mov_b32 s0, s33 -; DAGISEL-NEXT: s_mov_b32 s33, s32 -; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 -; DAGISEL-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 -; DAGISEL-NEXT: v_writelane_b32 v42, s0, 2 -; DAGISEL-NEXT: s_clause 0x1 -; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 -; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 -; DAGISEL-NEXT: v_dual_mov_b32 v41, v2 :: v_dual_mov_b32 v40, v1 -; DAGISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 -; DAGISEL-NEXT: v_writelane_b32 v42, s30, 0 -; DAGISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo -; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 -; DAGISEL-NEXT: v_writelane_b32 v42, s31, 1 -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: global_store_b32 v[40:41], v0, off -; DAGISEL-NEXT: s_clause 0x1 -; DAGISEL-NEXT: scratch_load_b32 v41, off, s33 -; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; DAGISEL-NEXT: v_readlane_b32 s31, v42, 1 -; DAGISEL-NEXT: v_readlane_b32 s30, v42, 0 -; DAGISEL-NEXT: s_mov_b32 s32, s33 -; DAGISEL-NEXT: v_readlane_b32 s0, v42, 2 -; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 -; DAGISEL-NEXT: scratch_load_b32 v42, off, s33 offset:8 ; 4-byte Folded Reload -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 -; DAGISEL-NEXT: s_mov_b32 s33, s0 -; DAGISEL-NEXT: s_wait_loadcnt 0x0 -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: basic_test: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL-NEXT: s_wait_expcnt 0x0 -; GISEL-NEXT: s_wait_samplecnt 0x0 -; GISEL-NEXT: s_wait_bvhcnt 0x0 -; GISEL-NEXT: s_wait_kmcnt 0x0 -; GISEL-NEXT: s_mov_b32 s0, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b32 s1, -1 -; GISEL-NEXT: scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_mov_b32 exec_lo, s1 -; GISEL-NEXT: v_writelane_b32 v42, s0, 2 -; GISEL-NEXT: s_clause 0x1 -; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:4 -; GISEL-NEXT: scratch_store_b32 off, v41, s33 -; GISEL-NEXT: v_dual_mov_b32 v40, v1 :: v_dual_mov_b32 v41, v2 -; GISEL-NEXT: v_add_nc_u32_e32 v1, 13, v0 -; GISEL-NEXT: v_writelane_b32 v42, s30, 0 -; GISEL-NEXT: s_mov_b32 s0, good_callee@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, good_callee@abs32@hi -; GISEL-NEXT: s_add_co_i32 s32, s32, 16 -; GISEL-NEXT: v_writelane_b32 v42, s31, 1 -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: global_store_b32 v[40:41], v0, off -; GISEL-NEXT: s_clause 0x1 -; GISEL-NEXT: scratch_load_b32 v41, off, s33 -; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:4 -; GISEL-NEXT: v_readlane_b32 s31, v42, 1 -; GISEL-NEXT: v_readlane_b32 s30, v42, 0 -; GISEL-NEXT: s_mov_b32 s32, s33 -; GISEL-NEXT: v_readlane_b32 s0, v42, 2 -; GISEL-NEXT: s_or_saveexec_b32 s1, -1 -; GISEL-NEXT: scratch_load_b32 v42, off, s33 offset:8 ; 4-byte Folded Reload -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_mov_b32 exec_lo, s1 -; GISEL-NEXT: s_mov_b32 s33, s0 -; GISEL-NEXT: s_wait_loadcnt 0x0 -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_setpc_b64 s[30:31] - %y = add i32 %x, 13 - %ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c) - store i32 %ret, ptr addrspace(1) %ptr - ret void -} - -declare amdgpu_gfx_whole_wave void @void_callee(i1 %active, i32 %x) - -define amdgpu_gfx void @ret_void(i32 %x) { -; DAGISEL-LABEL: ret_void: -; DAGISEL: ; %bb.0: -; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL-NEXT: s_wait_expcnt 0x0 -; DAGISEL-NEXT: s_wait_samplecnt 0x0 -; DAGISEL-NEXT: s_wait_bvhcnt 0x0 -; DAGISEL-NEXT: s_wait_kmcnt 0x0 -; DAGISEL-NEXT: s_mov_b32 s0, s33 -; DAGISEL-NEXT: s_mov_b32 s33, s32 -; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 -; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 -; DAGISEL-NEXT: v_writelane_b32 v40, s0, 2 -; DAGISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo -; DAGISEL-NEXT: s_add_co_i32 s32, s32, 16 -; DAGISEL-NEXT: v_writelane_b32 v40, s30, 0 -; DAGISEL-NEXT: v_writelane_b32 v40, s31, 1 -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; DAGISEL-NEXT: v_readlane_b32 s31, v40, 1 -; DAGISEL-NEXT: v_readlane_b32 s30, v40, 0 -; DAGISEL-NEXT: s_mov_b32 s32, s33 -; DAGISEL-NEXT: v_readlane_b32 s0, v40, 2 -; DAGISEL-NEXT: s_or_saveexec_b32 s1, -1 -; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_mov_b32 exec_lo, s1 -; DAGISEL-NEXT: s_mov_b32 s33, s0 -; DAGISEL-NEXT: s_wait_loadcnt 0x0 -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: ret_void: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL-NEXT: s_wait_expcnt 0x0 -; GISEL-NEXT: s_wait_samplecnt 0x0 -; GISEL-NEXT: s_wait_bvhcnt 0x0 -; GISEL-NEXT: s_wait_kmcnt 0x0 -; GISEL-NEXT: s_mov_b32 s0, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b32 s1, -1 -; GISEL-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_mov_b32 exec_lo, s1 -; GISEL-NEXT: v_writelane_b32 v40, s0, 2 -; GISEL-NEXT: s_mov_b32 s0, void_callee@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, void_callee@abs32@hi -; GISEL-NEXT: s_add_co_i32 s32, s32, 16 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: s_mov_b32 s32, s33 -; GISEL-NEXT: v_readlane_b32 s0, v40, 2 -; GISEL-NEXT: s_or_saveexec_b32 s1, -1 -; GISEL-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_mov_b32 exec_lo, s1 -; GISEL-NEXT: s_mov_b32 s33, s0 -; GISEL-NEXT: s_wait_loadcnt 0x0 -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_setpc_b64 s[30:31] - call void(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @void_callee, i32 %x) - ret void -} - diff --git a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll index 17c8010bcbe05..8fc5afb155573 100644 --- a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll @@ -101,29 +101,3 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { %ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false) ret i64 %ret } - -declare amdgpu_gfx_whole_wave i32 @callee(i1 %active, i32 %x) - -; Make sure we don't pass the first argument (i1). -define amdgpu_cs void @call(i32 %x, ptr %p) { - ; CHECK-LABEL: name: call - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV1]](p0), @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[MV]](p0) :: (store (s32) into %ir.p) - ; CHECK-NEXT: S_ENDPGM 0 - %ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, i32 %x) convergent - store i32 %ret, ptr %p - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll index 69809b115e037..3450d63ff7b4a 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll @@ -189,79 +189,3 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) { ret i64 %ret } -declare amdgpu_gfx_whole_wave i32 @callee(i1 %active, <8 x i32> %x) - -; Make sure we don't pass the first argument (i1). -define amdgpu_cs void @call(<8 x i32> %x, ptr %p) { - ; DAGISEL-LABEL: name: call - ; DAGISEL: bb.0 (%ir-block.0): - ; DAGISEL-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 - ; DAGISEL-NEXT: {{ $}} - ; DAGISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr9 - ; DAGISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; DAGISEL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr7 - ; DAGISEL-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; DAGISEL-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; DAGISEL-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; DAGISEL-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; DAGISEL-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; DAGISEL-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; DAGISEL-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; DAGISEL-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; DAGISEL-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; DAGISEL-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; DAGISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee - ; DAGISEL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee - ; DAGISEL-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 - ; DAGISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-NEXT: $vgpr0 = COPY [[COPY9]] - ; DAGISEL-NEXT: $vgpr1 = COPY [[COPY8]] - ; DAGISEL-NEXT: $vgpr2 = COPY [[COPY7]] - ; DAGISEL-NEXT: $vgpr3 = COPY [[COPY6]] - ; DAGISEL-NEXT: $vgpr4 = COPY [[COPY5]] - ; DAGISEL-NEXT: $vgpr5 = COPY [[COPY4]] - ; DAGISEL-NEXT: $vgpr6 = COPY [[COPY3]] - ; DAGISEL-NEXT: $vgpr7 = COPY [[COPY2]] - ; DAGISEL-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit-def $vgpr0 - ; DAGISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; DAGISEL-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; DAGISEL-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; DAGISEL-NEXT: FLAT_STORE_DWORD killed [[COPY11]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p) - ; DAGISEL-NEXT: S_ENDPGM 0 - ; - ; GISEL-LABEL: name: call - ; GISEL: bb.1 (%ir-block.0): - ; GISEL-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 - ; GISEL-NEXT: {{ $}} - ; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GISEL-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GISEL-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GISEL-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; GISEL-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 - ; GISEL-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 - ; GISEL-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GISEL-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr9 - ; GISEL-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; GISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 - ; GISEL-NEXT: $vgpr0 = COPY [[COPY]] - ; GISEL-NEXT: $vgpr1 = COPY [[COPY1]] - ; GISEL-NEXT: $vgpr2 = COPY [[COPY2]] - ; GISEL-NEXT: $vgpr3 = COPY [[COPY3]] - ; GISEL-NEXT: $vgpr4 = COPY [[COPY4]] - ; GISEL-NEXT: $vgpr5 = COPY [[COPY5]] - ; GISEL-NEXT: $vgpr6 = COPY [[COPY6]] - ; GISEL-NEXT: $vgpr7 = COPY [[COPY7]] - ; GISEL-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee - ; GISEL-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee - ; GISEL-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-NEXT: $sgpr30_sgpr31 = SI_CALL [[REG_SEQUENCE1]], @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit-def $vgpr0 - ; GISEL-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GISEL-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32 - ; GISEL-NEXT: FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p) - ; GISEL-NEXT: S_ENDPGM 0 - %ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x i32> %x) convergent - store i32 %ret, ptr %p - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll index 36e8adb23f1f5..a13a68a665aee 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll @@ -2412,1427 +2412,3 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2 %ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent ret <2 x half> %ret } - -declare amdgpu_gfx_whole_wave float @callee(i1 %active, <8 x float> %x) - -define amdgpu_cs void @call_from_entry(<8 x float> %x, ptr %p) { -; DAGISEL-LABEL: call_from_entry: -; DAGISEL: ; %bb.0: -; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo -; DAGISEL-NEXT: s_mov_b32 s32, 0 -; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 -; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL-NEXT: s_endpgm -; -; GISEL-LABEL: call_from_entry: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi -; GISEL-NEXT: s_mov_b32 s32, 0 -; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: flat_store_b32 v[40:41], v0 -; GISEL-NEXT: s_endpgm -; -; DAGISEL64-LABEL: call_from_entry: -; DAGISEL64: ; %bb.0: -; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi -; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo -; DAGISEL64-NEXT: s_mov_b32 s32, 0 -; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9 -; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8 -; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL64-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL64-NEXT: s_endpgm -; -; GISEL64-LABEL: call_from_entry: -; GISEL64: ; %bb.0: -; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo -; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi -; GISEL64-NEXT: s_mov_b32 s32, 0 -; GISEL64-NEXT: v_mov_b32_e32 v40, v8 -; GISEL64-NEXT: v_mov_b32_e32 v41, v9 -; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL64-NEXT: flat_store_b32 v[40:41], v0 -; GISEL64-NEXT: s_endpgm - %ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent - store float %ret, ptr %p - ret void -} - -define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> %x, ptr %p) { -; DAGISEL-LABEL: call_from_whole_wave: -; DAGISEL: ; %bb.0: -; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL-NEXT: s_wait_expcnt 0x0 -; DAGISEL-NEXT: s_wait_samplecnt 0x0 -; DAGISEL-NEXT: s_wait_bvhcnt 0x0 -; DAGISEL-NEXT: s_wait_kmcnt 0x0 -; DAGISEL-NEXT: s_mov_b32 s0, s33 -; DAGISEL-NEXT: s_mov_b32 s33, s32 -; DAGISEL-NEXT: s_xor_saveexec_b32 s4, -1 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 -; DAGISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 -; DAGISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 -; DAGISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 -; DAGISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 -; DAGISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 -; DAGISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 -; DAGISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 -; DAGISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 -; DAGISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 -; DAGISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 -; DAGISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 -; DAGISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 -; DAGISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 -; DAGISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 -; DAGISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 -; DAGISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 -; DAGISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 -; DAGISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 -; DAGISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 -; DAGISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 -; DAGISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 -; DAGISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 -; DAGISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 -; DAGISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 -; DAGISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 -; DAGISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 -; DAGISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 -; DAGISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 -; DAGISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 -; DAGISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 -; DAGISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 -; DAGISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 -; DAGISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 -; DAGISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 -; DAGISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 -; DAGISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 -; DAGISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 -; DAGISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 -; DAGISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 -; DAGISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 -; DAGISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 -; DAGISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 -; DAGISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 -; DAGISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 -; DAGISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 -; DAGISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 -; DAGISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 -; DAGISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 -; DAGISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 -; DAGISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 -; DAGISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 -; DAGISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 -; DAGISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 -; DAGISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 -; DAGISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 -; DAGISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 -; DAGISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 -; DAGISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 -; DAGISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 -; DAGISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 -; DAGISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 -; DAGISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 -; DAGISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 -; DAGISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 -; DAGISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 -; DAGISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 -; DAGISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 -; DAGISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 -; DAGISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 -; DAGISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 -; DAGISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 -; DAGISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 -; DAGISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 -; DAGISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 -; DAGISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 -; DAGISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 -; DAGISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 -; DAGISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 -; DAGISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 -; DAGISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 -; DAGISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 -; DAGISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 -; DAGISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 -; DAGISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 -; DAGISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 -; DAGISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 -; DAGISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 -; DAGISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 -; DAGISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 -; DAGISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 -; DAGISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 -; DAGISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 -; DAGISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 -; DAGISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 -; DAGISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 -; DAGISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 -; DAGISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 -; DAGISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 -; DAGISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 -; DAGISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 -; DAGISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 -; DAGISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 -; DAGISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 -; DAGISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 -; DAGISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 -; DAGISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 -; DAGISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 -; DAGISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 -; DAGISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 -; DAGISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 -; DAGISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 -; DAGISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 -; DAGISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 -; DAGISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 -; DAGISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 -; DAGISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 -; DAGISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 -; DAGISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 -; DAGISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 -; DAGISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 -; DAGISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 -; DAGISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 -; DAGISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 -; DAGISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 -; DAGISEL-NEXT: s_clause 0xf -; DAGISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 -; DAGISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 -; DAGISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 -; DAGISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 -; DAGISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 -; DAGISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 -; DAGISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 -; DAGISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 -; DAGISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 -; DAGISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 -; DAGISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 -; DAGISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 -; DAGISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 -; DAGISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 -; DAGISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 -; DAGISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 -; DAGISEL-NEXT: s_mov_b32 exec_lo, -1 -; DAGISEL-NEXT: s_clause 0x2 -; DAGISEL-NEXT: scratch_store_b32 off, v42, s33 -; DAGISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 -; DAGISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: v_writelane_b32 v42, s0, 3 -; DAGISEL-NEXT: s_mov_b32 s1, callee@abs32@hi -; DAGISEL-NEXT: s_mov_b32 s0, callee@abs32@lo -; DAGISEL-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL-NEXT: v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8 -; DAGISEL-NEXT: v_writelane_b32 v42, s4, 0 -; DAGISEL-NEXT: v_writelane_b32 v42, s30, 1 -; DAGISEL-NEXT: v_writelane_b32 v42, s31, 2 -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL-NEXT: v_readlane_b32 s31, v42, 2 -; DAGISEL-NEXT: v_readlane_b32 s30, v42, 1 -; DAGISEL-NEXT: v_readlane_b32 s4, v42, 0 -; DAGISEL-NEXT: v_readlane_b32 s0, v42, 3 -; DAGISEL-NEXT: s_clause 0x2 -; DAGISEL-NEXT: scratch_load_b32 v42, off, s33 -; DAGISEL-NEXT: scratch_load_b32 v40, off, s33 offset:164 -; DAGISEL-NEXT: scratch_load_b32 v41, off, s33 offset:168 -; DAGISEL-NEXT: s_mov_b32 s32, s33 -; DAGISEL-NEXT: s_xor_b32 exec_lo, s4, -1 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4 -; DAGISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8 -; DAGISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12 -; DAGISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16 -; DAGISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20 -; DAGISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24 -; DAGISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28 -; DAGISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32 -; DAGISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36 -; DAGISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40 -; DAGISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44 -; DAGISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48 -; DAGISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52 -; DAGISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56 -; DAGISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60 -; DAGISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64 -; DAGISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68 -; DAGISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72 -; DAGISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76 -; DAGISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80 -; DAGISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84 -; DAGISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88 -; DAGISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92 -; DAGISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96 -; DAGISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100 -; DAGISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104 -; DAGISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108 -; DAGISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112 -; DAGISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116 -; DAGISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120 -; DAGISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124 -; DAGISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132 -; DAGISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136 -; DAGISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140 -; DAGISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144 -; DAGISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148 -; DAGISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152 -; DAGISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156 -; DAGISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160 -; DAGISEL-NEXT: scratch_load_b32 v48, off, s33 offset:172 -; DAGISEL-NEXT: scratch_load_b32 v49, off, s33 offset:176 -; DAGISEL-NEXT: scratch_load_b32 v50, off, s33 offset:180 -; DAGISEL-NEXT: scratch_load_b32 v51, off, s33 offset:184 -; DAGISEL-NEXT: scratch_load_b32 v52, off, s33 offset:188 -; DAGISEL-NEXT: scratch_load_b32 v53, off, s33 offset:192 -; DAGISEL-NEXT: scratch_load_b32 v54, off, s33 offset:196 -; DAGISEL-NEXT: scratch_load_b32 v55, off, s33 offset:200 -; DAGISEL-NEXT: scratch_load_b32 v64, off, s33 offset:204 -; DAGISEL-NEXT: scratch_load_b32 v65, off, s33 offset:208 -; DAGISEL-NEXT: scratch_load_b32 v66, off, s33 offset:212 -; DAGISEL-NEXT: scratch_load_b32 v67, off, s33 offset:216 -; DAGISEL-NEXT: scratch_load_b32 v68, off, s33 offset:220 -; DAGISEL-NEXT: scratch_load_b32 v69, off, s33 offset:224 -; DAGISEL-NEXT: scratch_load_b32 v70, off, s33 offset:228 -; DAGISEL-NEXT: scratch_load_b32 v71, off, s33 offset:232 -; DAGISEL-NEXT: scratch_load_b32 v80, off, s33 offset:236 -; DAGISEL-NEXT: scratch_load_b32 v81, off, s33 offset:240 -; DAGISEL-NEXT: scratch_load_b32 v82, off, s33 offset:244 -; DAGISEL-NEXT: scratch_load_b32 v83, off, s33 offset:248 -; DAGISEL-NEXT: scratch_load_b32 v84, off, s33 offset:252 -; DAGISEL-NEXT: scratch_load_b32 v85, off, s33 offset:256 -; DAGISEL-NEXT: scratch_load_b32 v86, off, s33 offset:260 -; DAGISEL-NEXT: scratch_load_b32 v87, off, s33 offset:264 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_load_b32 v96, off, s33 offset:268 -; DAGISEL-NEXT: scratch_load_b32 v97, off, s33 offset:272 -; DAGISEL-NEXT: scratch_load_b32 v98, off, s33 offset:276 -; DAGISEL-NEXT: scratch_load_b32 v99, off, s33 offset:280 -; DAGISEL-NEXT: scratch_load_b32 v100, off, s33 offset:284 -; DAGISEL-NEXT: scratch_load_b32 v101, off, s33 offset:288 -; DAGISEL-NEXT: scratch_load_b32 v102, off, s33 offset:292 -; DAGISEL-NEXT: scratch_load_b32 v103, off, s33 offset:296 -; DAGISEL-NEXT: scratch_load_b32 v112, off, s33 offset:300 -; DAGISEL-NEXT: scratch_load_b32 v113, off, s33 offset:304 -; DAGISEL-NEXT: scratch_load_b32 v114, off, s33 offset:308 -; DAGISEL-NEXT: scratch_load_b32 v115, off, s33 offset:312 -; DAGISEL-NEXT: scratch_load_b32 v116, off, s33 offset:316 -; DAGISEL-NEXT: scratch_load_b32 v117, off, s33 offset:320 -; DAGISEL-NEXT: scratch_load_b32 v118, off, s33 offset:324 -; DAGISEL-NEXT: scratch_load_b32 v119, off, s33 offset:328 -; DAGISEL-NEXT: scratch_load_b32 v128, off, s33 offset:332 -; DAGISEL-NEXT: scratch_load_b32 v129, off, s33 offset:336 -; DAGISEL-NEXT: scratch_load_b32 v130, off, s33 offset:340 -; DAGISEL-NEXT: scratch_load_b32 v131, off, s33 offset:344 -; DAGISEL-NEXT: scratch_load_b32 v132, off, s33 offset:348 -; DAGISEL-NEXT: scratch_load_b32 v133, off, s33 offset:352 -; DAGISEL-NEXT: scratch_load_b32 v134, off, s33 offset:356 -; DAGISEL-NEXT: scratch_load_b32 v135, off, s33 offset:360 -; DAGISEL-NEXT: scratch_load_b32 v144, off, s33 offset:364 -; DAGISEL-NEXT: scratch_load_b32 v145, off, s33 offset:368 -; DAGISEL-NEXT: scratch_load_b32 v146, off, s33 offset:372 -; DAGISEL-NEXT: scratch_load_b32 v147, off, s33 offset:376 -; DAGISEL-NEXT: scratch_load_b32 v148, off, s33 offset:380 -; DAGISEL-NEXT: scratch_load_b32 v149, off, s33 offset:384 -; DAGISEL-NEXT: scratch_load_b32 v150, off, s33 offset:388 -; DAGISEL-NEXT: scratch_load_b32 v151, off, s33 offset:392 -; DAGISEL-NEXT: s_clause 0x1f -; DAGISEL-NEXT: scratch_load_b32 v160, off, s33 offset:396 -; DAGISEL-NEXT: scratch_load_b32 v161, off, s33 offset:400 -; DAGISEL-NEXT: scratch_load_b32 v162, off, s33 offset:404 -; DAGISEL-NEXT: scratch_load_b32 v163, off, s33 offset:408 -; DAGISEL-NEXT: scratch_load_b32 v164, off, s33 offset:412 -; DAGISEL-NEXT: scratch_load_b32 v165, off, s33 offset:416 -; DAGISEL-NEXT: scratch_load_b32 v166, off, s33 offset:420 -; DAGISEL-NEXT: scratch_load_b32 v167, off, s33 offset:424 -; DAGISEL-NEXT: scratch_load_b32 v176, off, s33 offset:428 -; DAGISEL-NEXT: scratch_load_b32 v177, off, s33 offset:432 -; DAGISEL-NEXT: scratch_load_b32 v178, off, s33 offset:436 -; DAGISEL-NEXT: scratch_load_b32 v179, off, s33 offset:440 -; DAGISEL-NEXT: scratch_load_b32 v180, off, s33 offset:444 -; DAGISEL-NEXT: scratch_load_b32 v181, off, s33 offset:448 -; DAGISEL-NEXT: scratch_load_b32 v182, off, s33 offset:452 -; DAGISEL-NEXT: scratch_load_b32 v183, off, s33 offset:456 -; DAGISEL-NEXT: scratch_load_b32 v192, off, s33 offset:460 -; DAGISEL-NEXT: scratch_load_b32 v193, off, s33 offset:464 -; DAGISEL-NEXT: scratch_load_b32 v194, off, s33 offset:468 -; DAGISEL-NEXT: scratch_load_b32 v195, off, s33 offset:472 -; DAGISEL-NEXT: scratch_load_b32 v196, off, s33 offset:476 -; DAGISEL-NEXT: scratch_load_b32 v197, off, s33 offset:480 -; DAGISEL-NEXT: scratch_load_b32 v198, off, s33 offset:484 -; DAGISEL-NEXT: scratch_load_b32 v199, off, s33 offset:488 -; DAGISEL-NEXT: scratch_load_b32 v208, off, s33 offset:492 -; DAGISEL-NEXT: scratch_load_b32 v209, off, s33 offset:496 -; DAGISEL-NEXT: scratch_load_b32 v210, off, s33 offset:500 -; DAGISEL-NEXT: scratch_load_b32 v211, off, s33 offset:504 -; DAGISEL-NEXT: scratch_load_b32 v212, off, s33 offset:508 -; DAGISEL-NEXT: scratch_load_b32 v213, off, s33 offset:512 -; DAGISEL-NEXT: scratch_load_b32 v214, off, s33 offset:516 -; DAGISEL-NEXT: scratch_load_b32 v215, off, s33 offset:520 -; DAGISEL-NEXT: s_clause 0xf -; DAGISEL-NEXT: scratch_load_b32 v224, off, s33 offset:524 -; DAGISEL-NEXT: scratch_load_b32 v225, off, s33 offset:528 -; DAGISEL-NEXT: scratch_load_b32 v226, off, s33 offset:532 -; DAGISEL-NEXT: scratch_load_b32 v227, off, s33 offset:536 -; DAGISEL-NEXT: scratch_load_b32 v228, off, s33 offset:540 -; DAGISEL-NEXT: scratch_load_b32 v229, off, s33 offset:544 -; DAGISEL-NEXT: scratch_load_b32 v230, off, s33 offset:548 -; DAGISEL-NEXT: scratch_load_b32 v231, off, s33 offset:552 -; DAGISEL-NEXT: scratch_load_b32 v240, off, s33 offset:556 -; DAGISEL-NEXT: scratch_load_b32 v241, off, s33 offset:560 -; DAGISEL-NEXT: scratch_load_b32 v242, off, s33 offset:564 -; DAGISEL-NEXT: scratch_load_b32 v243, off, s33 offset:568 -; DAGISEL-NEXT: scratch_load_b32 v244, off, s33 offset:572 -; DAGISEL-NEXT: scratch_load_b32 v245, off, s33 offset:576 -; DAGISEL-NEXT: scratch_load_b32 v246, off, s33 offset:580 -; DAGISEL-NEXT: scratch_load_b32 v247, off, s33 offset:584 -; DAGISEL-NEXT: s_mov_b32 exec_lo, s4 -; DAGISEL-NEXT: s_mov_b32 s33, s0 -; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL-NEXT: s_wait_alu 0xfffe -; DAGISEL-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: call_from_whole_wave: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL-NEXT: s_wait_expcnt 0x0 -; GISEL-NEXT: s_wait_samplecnt 0x0 -; GISEL-NEXT: s_wait_bvhcnt 0x0 -; GISEL-NEXT: s_wait_kmcnt 0x0 -; GISEL-NEXT: s_mov_b32 s0, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_xor_saveexec_b32 s4, -1 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_store_b32 off, v0, s33 offset:4 -; GISEL-NEXT: scratch_store_b32 off, v1, s33 offset:8 -; GISEL-NEXT: scratch_store_b32 off, v2, s33 offset:12 -; GISEL-NEXT: scratch_store_b32 off, v3, s33 offset:16 -; GISEL-NEXT: scratch_store_b32 off, v4, s33 offset:20 -; GISEL-NEXT: scratch_store_b32 off, v5, s33 offset:24 -; GISEL-NEXT: scratch_store_b32 off, v6, s33 offset:28 -; GISEL-NEXT: scratch_store_b32 off, v7, s33 offset:32 -; GISEL-NEXT: scratch_store_b32 off, v8, s33 offset:36 -; GISEL-NEXT: scratch_store_b32 off, v9, s33 offset:40 -; GISEL-NEXT: scratch_store_b32 off, v10, s33 offset:44 -; GISEL-NEXT: scratch_store_b32 off, v11, s33 offset:48 -; GISEL-NEXT: scratch_store_b32 off, v12, s33 offset:52 -; GISEL-NEXT: scratch_store_b32 off, v13, s33 offset:56 -; GISEL-NEXT: scratch_store_b32 off, v14, s33 offset:60 -; GISEL-NEXT: scratch_store_b32 off, v15, s33 offset:64 -; GISEL-NEXT: scratch_store_b32 off, v16, s33 offset:68 -; GISEL-NEXT: scratch_store_b32 off, v17, s33 offset:72 -; GISEL-NEXT: scratch_store_b32 off, v18, s33 offset:76 -; GISEL-NEXT: scratch_store_b32 off, v19, s33 offset:80 -; GISEL-NEXT: scratch_store_b32 off, v20, s33 offset:84 -; GISEL-NEXT: scratch_store_b32 off, v21, s33 offset:88 -; GISEL-NEXT: scratch_store_b32 off, v22, s33 offset:92 -; GISEL-NEXT: scratch_store_b32 off, v23, s33 offset:96 -; GISEL-NEXT: scratch_store_b32 off, v24, s33 offset:100 -; GISEL-NEXT: scratch_store_b32 off, v25, s33 offset:104 -; GISEL-NEXT: scratch_store_b32 off, v26, s33 offset:108 -; GISEL-NEXT: scratch_store_b32 off, v27, s33 offset:112 -; GISEL-NEXT: scratch_store_b32 off, v28, s33 offset:116 -; GISEL-NEXT: scratch_store_b32 off, v29, s33 offset:120 -; GISEL-NEXT: scratch_store_b32 off, v30, s33 offset:124 -; GISEL-NEXT: scratch_store_b32 off, v31, s33 offset:128 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_store_b32 off, v32, s33 offset:132 -; GISEL-NEXT: scratch_store_b32 off, v33, s33 offset:136 -; GISEL-NEXT: scratch_store_b32 off, v34, s33 offset:140 -; GISEL-NEXT: scratch_store_b32 off, v35, s33 offset:144 -; GISEL-NEXT: scratch_store_b32 off, v36, s33 offset:148 -; GISEL-NEXT: scratch_store_b32 off, v37, s33 offset:152 -; GISEL-NEXT: scratch_store_b32 off, v38, s33 offset:156 -; GISEL-NEXT: scratch_store_b32 off, v39, s33 offset:160 -; GISEL-NEXT: scratch_store_b32 off, v48, s33 offset:172 -; GISEL-NEXT: scratch_store_b32 off, v49, s33 offset:176 -; GISEL-NEXT: scratch_store_b32 off, v50, s33 offset:180 -; GISEL-NEXT: scratch_store_b32 off, v51, s33 offset:184 -; GISEL-NEXT: scratch_store_b32 off, v52, s33 offset:188 -; GISEL-NEXT: scratch_store_b32 off, v53, s33 offset:192 -; GISEL-NEXT: scratch_store_b32 off, v54, s33 offset:196 -; GISEL-NEXT: scratch_store_b32 off, v55, s33 offset:200 -; GISEL-NEXT: scratch_store_b32 off, v64, s33 offset:204 -; GISEL-NEXT: scratch_store_b32 off, v65, s33 offset:208 -; GISEL-NEXT: scratch_store_b32 off, v66, s33 offset:212 -; GISEL-NEXT: scratch_store_b32 off, v67, s33 offset:216 -; GISEL-NEXT: scratch_store_b32 off, v68, s33 offset:220 -; GISEL-NEXT: scratch_store_b32 off, v69, s33 offset:224 -; GISEL-NEXT: scratch_store_b32 off, v70, s33 offset:228 -; GISEL-NEXT: scratch_store_b32 off, v71, s33 offset:232 -; GISEL-NEXT: scratch_store_b32 off, v80, s33 offset:236 -; GISEL-NEXT: scratch_store_b32 off, v81, s33 offset:240 -; GISEL-NEXT: scratch_store_b32 off, v82, s33 offset:244 -; GISEL-NEXT: scratch_store_b32 off, v83, s33 offset:248 -; GISEL-NEXT: scratch_store_b32 off, v84, s33 offset:252 -; GISEL-NEXT: scratch_store_b32 off, v85, s33 offset:256 -; GISEL-NEXT: scratch_store_b32 off, v86, s33 offset:260 -; GISEL-NEXT: scratch_store_b32 off, v87, s33 offset:264 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_store_b32 off, v96, s33 offset:268 -; GISEL-NEXT: scratch_store_b32 off, v97, s33 offset:272 -; GISEL-NEXT: scratch_store_b32 off, v98, s33 offset:276 -; GISEL-NEXT: scratch_store_b32 off, v99, s33 offset:280 -; GISEL-NEXT: scratch_store_b32 off, v100, s33 offset:284 -; GISEL-NEXT: scratch_store_b32 off, v101, s33 offset:288 -; GISEL-NEXT: scratch_store_b32 off, v102, s33 offset:292 -; GISEL-NEXT: scratch_store_b32 off, v103, s33 offset:296 -; GISEL-NEXT: scratch_store_b32 off, v112, s33 offset:300 -; GISEL-NEXT: scratch_store_b32 off, v113, s33 offset:304 -; GISEL-NEXT: scratch_store_b32 off, v114, s33 offset:308 -; GISEL-NEXT: scratch_store_b32 off, v115, s33 offset:312 -; GISEL-NEXT: scratch_store_b32 off, v116, s33 offset:316 -; GISEL-NEXT: scratch_store_b32 off, v117, s33 offset:320 -; GISEL-NEXT: scratch_store_b32 off, v118, s33 offset:324 -; GISEL-NEXT: scratch_store_b32 off, v119, s33 offset:328 -; GISEL-NEXT: scratch_store_b32 off, v128, s33 offset:332 -; GISEL-NEXT: scratch_store_b32 off, v129, s33 offset:336 -; GISEL-NEXT: scratch_store_b32 off, v130, s33 offset:340 -; GISEL-NEXT: scratch_store_b32 off, v131, s33 offset:344 -; GISEL-NEXT: scratch_store_b32 off, v132, s33 offset:348 -; GISEL-NEXT: scratch_store_b32 off, v133, s33 offset:352 -; GISEL-NEXT: scratch_store_b32 off, v134, s33 offset:356 -; GISEL-NEXT: scratch_store_b32 off, v135, s33 offset:360 -; GISEL-NEXT: scratch_store_b32 off, v144, s33 offset:364 -; GISEL-NEXT: scratch_store_b32 off, v145, s33 offset:368 -; GISEL-NEXT: scratch_store_b32 off, v146, s33 offset:372 -; GISEL-NEXT: scratch_store_b32 off, v147, s33 offset:376 -; GISEL-NEXT: scratch_store_b32 off, v148, s33 offset:380 -; GISEL-NEXT: scratch_store_b32 off, v149, s33 offset:384 -; GISEL-NEXT: scratch_store_b32 off, v150, s33 offset:388 -; GISEL-NEXT: scratch_store_b32 off, v151, s33 offset:392 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_store_b32 off, v160, s33 offset:396 -; GISEL-NEXT: scratch_store_b32 off, v161, s33 offset:400 -; GISEL-NEXT: scratch_store_b32 off, v162, s33 offset:404 -; GISEL-NEXT: scratch_store_b32 off, v163, s33 offset:408 -; GISEL-NEXT: scratch_store_b32 off, v164, s33 offset:412 -; GISEL-NEXT: scratch_store_b32 off, v165, s33 offset:416 -; GISEL-NEXT: scratch_store_b32 off, v166, s33 offset:420 -; GISEL-NEXT: scratch_store_b32 off, v167, s33 offset:424 -; GISEL-NEXT: scratch_store_b32 off, v176, s33 offset:428 -; GISEL-NEXT: scratch_store_b32 off, v177, s33 offset:432 -; GISEL-NEXT: scratch_store_b32 off, v178, s33 offset:436 -; GISEL-NEXT: scratch_store_b32 off, v179, s33 offset:440 -; GISEL-NEXT: scratch_store_b32 off, v180, s33 offset:444 -; GISEL-NEXT: scratch_store_b32 off, v181, s33 offset:448 -; GISEL-NEXT: scratch_store_b32 off, v182, s33 offset:452 -; GISEL-NEXT: scratch_store_b32 off, v183, s33 offset:456 -; GISEL-NEXT: scratch_store_b32 off, v192, s33 offset:460 -; GISEL-NEXT: scratch_store_b32 off, v193, s33 offset:464 -; GISEL-NEXT: scratch_store_b32 off, v194, s33 offset:468 -; GISEL-NEXT: scratch_store_b32 off, v195, s33 offset:472 -; GISEL-NEXT: scratch_store_b32 off, v196, s33 offset:476 -; GISEL-NEXT: scratch_store_b32 off, v197, s33 offset:480 -; GISEL-NEXT: scratch_store_b32 off, v198, s33 offset:484 -; GISEL-NEXT: scratch_store_b32 off, v199, s33 offset:488 -; GISEL-NEXT: scratch_store_b32 off, v208, s33 offset:492 -; GISEL-NEXT: scratch_store_b32 off, v209, s33 offset:496 -; GISEL-NEXT: scratch_store_b32 off, v210, s33 offset:500 -; GISEL-NEXT: scratch_store_b32 off, v211, s33 offset:504 -; GISEL-NEXT: scratch_store_b32 off, v212, s33 offset:508 -; GISEL-NEXT: scratch_store_b32 off, v213, s33 offset:512 -; GISEL-NEXT: scratch_store_b32 off, v214, s33 offset:516 -; GISEL-NEXT: scratch_store_b32 off, v215, s33 offset:520 -; GISEL-NEXT: s_clause 0xf -; GISEL-NEXT: scratch_store_b32 off, v224, s33 offset:524 -; GISEL-NEXT: scratch_store_b32 off, v225, s33 offset:528 -; GISEL-NEXT: scratch_store_b32 off, v226, s33 offset:532 -; GISEL-NEXT: scratch_store_b32 off, v227, s33 offset:536 -; GISEL-NEXT: scratch_store_b32 off, v228, s33 offset:540 -; GISEL-NEXT: scratch_store_b32 off, v229, s33 offset:544 -; GISEL-NEXT: scratch_store_b32 off, v230, s33 offset:548 -; GISEL-NEXT: scratch_store_b32 off, v231, s33 offset:552 -; GISEL-NEXT: scratch_store_b32 off, v240, s33 offset:556 -; GISEL-NEXT: scratch_store_b32 off, v241, s33 offset:560 -; GISEL-NEXT: scratch_store_b32 off, v242, s33 offset:564 -; GISEL-NEXT: scratch_store_b32 off, v243, s33 offset:568 -; GISEL-NEXT: scratch_store_b32 off, v244, s33 offset:572 -; GISEL-NEXT: scratch_store_b32 off, v245, s33 offset:576 -; GISEL-NEXT: scratch_store_b32 off, v246, s33 offset:580 -; GISEL-NEXT: scratch_store_b32 off, v247, s33 offset:584 -; GISEL-NEXT: s_mov_b32 exec_lo, -1 -; GISEL-NEXT: s_clause 0x2 -; GISEL-NEXT: scratch_store_b32 off, v42, s33 -; GISEL-NEXT: scratch_store_b32 off, v40, s33 offset:164 -; GISEL-NEXT: scratch_store_b32 off, v41, s33 offset:168 -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: v_writelane_b32 v42, s0, 3 -; GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo -; GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi -; GISEL-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL-NEXT: v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9 -; GISEL-NEXT: v_writelane_b32 v42, s4, 0 -; GISEL-NEXT: v_writelane_b32 v42, s30, 1 -; GISEL-NEXT: v_writelane_b32 v42, s31, 2 -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL-NEXT: flat_store_b32 v[40:41], v0 -; GISEL-NEXT: v_readlane_b32 s31, v42, 2 -; GISEL-NEXT: v_readlane_b32 s30, v42, 1 -; GISEL-NEXT: v_readlane_b32 s4, v42, 0 -; GISEL-NEXT: v_readlane_b32 s0, v42, 3 -; GISEL-NEXT: s_clause 0x2 -; GISEL-NEXT: scratch_load_b32 v42, off, s33 -; GISEL-NEXT: scratch_load_b32 v40, off, s33 offset:164 -; GISEL-NEXT: scratch_load_b32 v41, off, s33 offset:168 -; GISEL-NEXT: s_mov_b32 s32, s33 -; GISEL-NEXT: s_xor_b32 exec_lo, s4, -1 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_load_b32 v0, off, s33 offset:4 -; GISEL-NEXT: scratch_load_b32 v1, off, s33 offset:8 -; GISEL-NEXT: scratch_load_b32 v2, off, s33 offset:12 -; GISEL-NEXT: scratch_load_b32 v3, off, s33 offset:16 -; GISEL-NEXT: scratch_load_b32 v4, off, s33 offset:20 -; GISEL-NEXT: scratch_load_b32 v5, off, s33 offset:24 -; GISEL-NEXT: scratch_load_b32 v6, off, s33 offset:28 -; GISEL-NEXT: scratch_load_b32 v7, off, s33 offset:32 -; GISEL-NEXT: scratch_load_b32 v8, off, s33 offset:36 -; GISEL-NEXT: scratch_load_b32 v9, off, s33 offset:40 -; GISEL-NEXT: scratch_load_b32 v10, off, s33 offset:44 -; GISEL-NEXT: scratch_load_b32 v11, off, s33 offset:48 -; GISEL-NEXT: scratch_load_b32 v12, off, s33 offset:52 -; GISEL-NEXT: scratch_load_b32 v13, off, s33 offset:56 -; GISEL-NEXT: scratch_load_b32 v14, off, s33 offset:60 -; GISEL-NEXT: scratch_load_b32 v15, off, s33 offset:64 -; GISEL-NEXT: scratch_load_b32 v16, off, s33 offset:68 -; GISEL-NEXT: scratch_load_b32 v17, off, s33 offset:72 -; GISEL-NEXT: scratch_load_b32 v18, off, s33 offset:76 -; GISEL-NEXT: scratch_load_b32 v19, off, s33 offset:80 -; GISEL-NEXT: scratch_load_b32 v20, off, s33 offset:84 -; GISEL-NEXT: scratch_load_b32 v21, off, s33 offset:88 -; GISEL-NEXT: scratch_load_b32 v22, off, s33 offset:92 -; GISEL-NEXT: scratch_load_b32 v23, off, s33 offset:96 -; GISEL-NEXT: scratch_load_b32 v24, off, s33 offset:100 -; GISEL-NEXT: scratch_load_b32 v25, off, s33 offset:104 -; GISEL-NEXT: scratch_load_b32 v26, off, s33 offset:108 -; GISEL-NEXT: scratch_load_b32 v27, off, s33 offset:112 -; GISEL-NEXT: scratch_load_b32 v28, off, s33 offset:116 -; GISEL-NEXT: scratch_load_b32 v29, off, s33 offset:120 -; GISEL-NEXT: scratch_load_b32 v30, off, s33 offset:124 -; GISEL-NEXT: scratch_load_b32 v31, off, s33 offset:128 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_load_b32 v32, off, s33 offset:132 -; GISEL-NEXT: scratch_load_b32 v33, off, s33 offset:136 -; GISEL-NEXT: scratch_load_b32 v34, off, s33 offset:140 -; GISEL-NEXT: scratch_load_b32 v35, off, s33 offset:144 -; GISEL-NEXT: scratch_load_b32 v36, off, s33 offset:148 -; GISEL-NEXT: scratch_load_b32 v37, off, s33 offset:152 -; GISEL-NEXT: scratch_load_b32 v38, off, s33 offset:156 -; GISEL-NEXT: scratch_load_b32 v39, off, s33 offset:160 -; GISEL-NEXT: scratch_load_b32 v48, off, s33 offset:172 -; GISEL-NEXT: scratch_load_b32 v49, off, s33 offset:176 -; GISEL-NEXT: scratch_load_b32 v50, off, s33 offset:180 -; GISEL-NEXT: scratch_load_b32 v51, off, s33 offset:184 -; GISEL-NEXT: scratch_load_b32 v52, off, s33 offset:188 -; GISEL-NEXT: scratch_load_b32 v53, off, s33 offset:192 -; GISEL-NEXT: scratch_load_b32 v54, off, s33 offset:196 -; GISEL-NEXT: scratch_load_b32 v55, off, s33 offset:200 -; GISEL-NEXT: scratch_load_b32 v64, off, s33 offset:204 -; GISEL-NEXT: scratch_load_b32 v65, off, s33 offset:208 -; GISEL-NEXT: scratch_load_b32 v66, off, s33 offset:212 -; GISEL-NEXT: scratch_load_b32 v67, off, s33 offset:216 -; GISEL-NEXT: scratch_load_b32 v68, off, s33 offset:220 -; GISEL-NEXT: scratch_load_b32 v69, off, s33 offset:224 -; GISEL-NEXT: scratch_load_b32 v70, off, s33 offset:228 -; GISEL-NEXT: scratch_load_b32 v71, off, s33 offset:232 -; GISEL-NEXT: scratch_load_b32 v80, off, s33 offset:236 -; GISEL-NEXT: scratch_load_b32 v81, off, s33 offset:240 -; GISEL-NEXT: scratch_load_b32 v82, off, s33 offset:244 -; GISEL-NEXT: scratch_load_b32 v83, off, s33 offset:248 -; GISEL-NEXT: scratch_load_b32 v84, off, s33 offset:252 -; GISEL-NEXT: scratch_load_b32 v85, off, s33 offset:256 -; GISEL-NEXT: scratch_load_b32 v86, off, s33 offset:260 -; GISEL-NEXT: scratch_load_b32 v87, off, s33 offset:264 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_load_b32 v96, off, s33 offset:268 -; GISEL-NEXT: scratch_load_b32 v97, off, s33 offset:272 -; GISEL-NEXT: scratch_load_b32 v98, off, s33 offset:276 -; GISEL-NEXT: scratch_load_b32 v99, off, s33 offset:280 -; GISEL-NEXT: scratch_load_b32 v100, off, s33 offset:284 -; GISEL-NEXT: scratch_load_b32 v101, off, s33 offset:288 -; GISEL-NEXT: scratch_load_b32 v102, off, s33 offset:292 -; GISEL-NEXT: scratch_load_b32 v103, off, s33 offset:296 -; GISEL-NEXT: scratch_load_b32 v112, off, s33 offset:300 -; GISEL-NEXT: scratch_load_b32 v113, off, s33 offset:304 -; GISEL-NEXT: scratch_load_b32 v114, off, s33 offset:308 -; GISEL-NEXT: scratch_load_b32 v115, off, s33 offset:312 -; GISEL-NEXT: scratch_load_b32 v116, off, s33 offset:316 -; GISEL-NEXT: scratch_load_b32 v117, off, s33 offset:320 -; GISEL-NEXT: scratch_load_b32 v118, off, s33 offset:324 -; GISEL-NEXT: scratch_load_b32 v119, off, s33 offset:328 -; GISEL-NEXT: scratch_load_b32 v128, off, s33 offset:332 -; GISEL-NEXT: scratch_load_b32 v129, off, s33 offset:336 -; GISEL-NEXT: scratch_load_b32 v130, off, s33 offset:340 -; GISEL-NEXT: scratch_load_b32 v131, off, s33 offset:344 -; GISEL-NEXT: scratch_load_b32 v132, off, s33 offset:348 -; GISEL-NEXT: scratch_load_b32 v133, off, s33 offset:352 -; GISEL-NEXT: scratch_load_b32 v134, off, s33 offset:356 -; GISEL-NEXT: scratch_load_b32 v135, off, s33 offset:360 -; GISEL-NEXT: scratch_load_b32 v144, off, s33 offset:364 -; GISEL-NEXT: scratch_load_b32 v145, off, s33 offset:368 -; GISEL-NEXT: scratch_load_b32 v146, off, s33 offset:372 -; GISEL-NEXT: scratch_load_b32 v147, off, s33 offset:376 -; GISEL-NEXT: scratch_load_b32 v148, off, s33 offset:380 -; GISEL-NEXT: scratch_load_b32 v149, off, s33 offset:384 -; GISEL-NEXT: scratch_load_b32 v150, off, s33 offset:388 -; GISEL-NEXT: scratch_load_b32 v151, off, s33 offset:392 -; GISEL-NEXT: s_clause 0x1f -; GISEL-NEXT: scratch_load_b32 v160, off, s33 offset:396 -; GISEL-NEXT: scratch_load_b32 v161, off, s33 offset:400 -; GISEL-NEXT: scratch_load_b32 v162, off, s33 offset:404 -; GISEL-NEXT: scratch_load_b32 v163, off, s33 offset:408 -; GISEL-NEXT: scratch_load_b32 v164, off, s33 offset:412 -; GISEL-NEXT: scratch_load_b32 v165, off, s33 offset:416 -; GISEL-NEXT: scratch_load_b32 v166, off, s33 offset:420 -; GISEL-NEXT: scratch_load_b32 v167, off, s33 offset:424 -; GISEL-NEXT: scratch_load_b32 v176, off, s33 offset:428 -; GISEL-NEXT: scratch_load_b32 v177, off, s33 offset:432 -; GISEL-NEXT: scratch_load_b32 v178, off, s33 offset:436 -; GISEL-NEXT: scratch_load_b32 v179, off, s33 offset:440 -; GISEL-NEXT: scratch_load_b32 v180, off, s33 offset:444 -; GISEL-NEXT: scratch_load_b32 v181, off, s33 offset:448 -; GISEL-NEXT: scratch_load_b32 v182, off, s33 offset:452 -; GISEL-NEXT: scratch_load_b32 v183, off, s33 offset:456 -; GISEL-NEXT: scratch_load_b32 v192, off, s33 offset:460 -; GISEL-NEXT: scratch_load_b32 v193, off, s33 offset:464 -; GISEL-NEXT: scratch_load_b32 v194, off, s33 offset:468 -; GISEL-NEXT: scratch_load_b32 v195, off, s33 offset:472 -; GISEL-NEXT: scratch_load_b32 v196, off, s33 offset:476 -; GISEL-NEXT: scratch_load_b32 v197, off, s33 offset:480 -; GISEL-NEXT: scratch_load_b32 v198, off, s33 offset:484 -; GISEL-NEXT: scratch_load_b32 v199, off, s33 offset:488 -; GISEL-NEXT: scratch_load_b32 v208, off, s33 offset:492 -; GISEL-NEXT: scratch_load_b32 v209, off, s33 offset:496 -; GISEL-NEXT: scratch_load_b32 v210, off, s33 offset:500 -; GISEL-NEXT: scratch_load_b32 v211, off, s33 offset:504 -; GISEL-NEXT: scratch_load_b32 v212, off, s33 offset:508 -; GISEL-NEXT: scratch_load_b32 v213, off, s33 offset:512 -; GISEL-NEXT: scratch_load_b32 v214, off, s33 offset:516 -; GISEL-NEXT: scratch_load_b32 v215, off, s33 offset:520 -; GISEL-NEXT: s_clause 0xf -; GISEL-NEXT: scratch_load_b32 v224, off, s33 offset:524 -; GISEL-NEXT: scratch_load_b32 v225, off, s33 offset:528 -; GISEL-NEXT: scratch_load_b32 v226, off, s33 offset:532 -; GISEL-NEXT: scratch_load_b32 v227, off, s33 offset:536 -; GISEL-NEXT: scratch_load_b32 v228, off, s33 offset:540 -; GISEL-NEXT: scratch_load_b32 v229, off, s33 offset:544 -; GISEL-NEXT: scratch_load_b32 v230, off, s33 offset:548 -; GISEL-NEXT: scratch_load_b32 v231, off, s33 offset:552 -; GISEL-NEXT: scratch_load_b32 v240, off, s33 offset:556 -; GISEL-NEXT: scratch_load_b32 v241, off, s33 offset:560 -; GISEL-NEXT: scratch_load_b32 v242, off, s33 offset:564 -; GISEL-NEXT: scratch_load_b32 v243, off, s33 offset:568 -; GISEL-NEXT: scratch_load_b32 v244, off, s33 offset:572 -; GISEL-NEXT: scratch_load_b32 v245, off, s33 offset:576 -; GISEL-NEXT: scratch_load_b32 v246, off, s33 offset:580 -; GISEL-NEXT: scratch_load_b32 v247, off, s33 offset:584 -; GISEL-NEXT: s_mov_b32 exec_lo, s4 -; GISEL-NEXT: s_mov_b32 s33, s0 -; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL-NEXT: s_wait_alu 0xfffe -; GISEL-NEXT: s_setpc_b64 s[30:31] -; -; DAGISEL64-LABEL: call_from_whole_wave: -; DAGISEL64: ; %bb.0: -; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL64-NEXT: s_wait_expcnt 0x0 -; DAGISEL64-NEXT: s_wait_samplecnt 0x0 -; DAGISEL64-NEXT: s_wait_bvhcnt 0x0 -; DAGISEL64-NEXT: s_wait_kmcnt 0x0 -; DAGISEL64-NEXT: s_mov_b32 s0, s33 -; DAGISEL64-NEXT: s_mov_b32 s33, s32 -; DAGISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 -; DAGISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 -; DAGISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 -; DAGISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 -; DAGISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 -; DAGISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 -; DAGISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 -; DAGISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 -; DAGISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 -; DAGISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 -; DAGISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 -; DAGISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 -; DAGISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 -; DAGISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 -; DAGISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 -; DAGISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 -; DAGISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 -; DAGISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 -; DAGISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 -; DAGISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 -; DAGISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 -; DAGISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 -; DAGISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 -; DAGISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 -; DAGISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 -; DAGISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 -; DAGISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 -; DAGISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 -; DAGISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 -; DAGISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 -; DAGISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 -; DAGISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 -; DAGISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 -; DAGISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 -; DAGISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 -; DAGISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 -; DAGISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 -; DAGISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 -; DAGISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 -; DAGISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172 -; DAGISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176 -; DAGISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180 -; DAGISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184 -; DAGISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188 -; DAGISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192 -; DAGISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196 -; DAGISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200 -; DAGISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204 -; DAGISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208 -; DAGISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212 -; DAGISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216 -; DAGISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220 -; DAGISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224 -; DAGISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228 -; DAGISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232 -; DAGISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236 -; DAGISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240 -; DAGISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244 -; DAGISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248 -; DAGISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252 -; DAGISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256 -; DAGISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260 -; DAGISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268 -; DAGISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272 -; DAGISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276 -; DAGISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280 -; DAGISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284 -; DAGISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288 -; DAGISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292 -; DAGISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296 -; DAGISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300 -; DAGISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304 -; DAGISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308 -; DAGISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312 -; DAGISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316 -; DAGISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320 -; DAGISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324 -; DAGISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328 -; DAGISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332 -; DAGISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336 -; DAGISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340 -; DAGISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344 -; DAGISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348 -; DAGISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352 -; DAGISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356 -; DAGISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360 -; DAGISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364 -; DAGISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368 -; DAGISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372 -; DAGISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376 -; DAGISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380 -; DAGISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384 -; DAGISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388 -; DAGISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396 -; DAGISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400 -; DAGISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404 -; DAGISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408 -; DAGISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412 -; DAGISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416 -; DAGISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420 -; DAGISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424 -; DAGISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428 -; DAGISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432 -; DAGISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436 -; DAGISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440 -; DAGISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444 -; DAGISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448 -; DAGISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452 -; DAGISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456 -; DAGISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460 -; DAGISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464 -; DAGISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468 -; DAGISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472 -; DAGISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476 -; DAGISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480 -; DAGISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484 -; DAGISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488 -; DAGISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492 -; DAGISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496 -; DAGISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500 -; DAGISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504 -; DAGISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508 -; DAGISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512 -; DAGISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516 -; DAGISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520 -; DAGISEL64-NEXT: s_clause 0xf -; DAGISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524 -; DAGISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528 -; DAGISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532 -; DAGISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536 -; DAGISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540 -; DAGISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544 -; DAGISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548 -; DAGISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552 -; DAGISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556 -; DAGISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560 -; DAGISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564 -; DAGISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568 -; DAGISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572 -; DAGISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576 -; DAGISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580 -; DAGISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584 -; DAGISEL64-NEXT: s_mov_b64 exec, -1 -; DAGISEL64-NEXT: s_clause 0x2 -; DAGISEL64-NEXT: scratch_store_b32 off, v42, s33 -; DAGISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164 -; DAGISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 -; DAGISEL64-NEXT: s_wait_alu 0xfffe -; DAGISEL64-NEXT: v_writelane_b32 v42, s0, 4 -; DAGISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi -; DAGISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo -; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; DAGISEL64-NEXT: v_mov_b32_e32 v41, v9 -; DAGISEL64-NEXT: v_writelane_b32 v42, s4, 0 -; DAGISEL64-NEXT: v_mov_b32_e32 v40, v8 -; DAGISEL64-NEXT: v_writelane_b32 v42, s5, 1 -; DAGISEL64-NEXT: v_writelane_b32 v42, s30, 2 -; DAGISEL64-NEXT: v_writelane_b32 v42, s31, 3 -; DAGISEL64-NEXT: s_wait_alu 0xfffe -; DAGISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; DAGISEL64-NEXT: flat_store_b32 v[40:41], v0 -; DAGISEL64-NEXT: v_readlane_b32 s31, v42, 3 -; DAGISEL64-NEXT: v_readlane_b32 s30, v42, 2 -; DAGISEL64-NEXT: v_readlane_b32 s5, v42, 1 -; DAGISEL64-NEXT: v_readlane_b32 s4, v42, 0 -; DAGISEL64-NEXT: v_readlane_b32 s0, v42, 4 -; DAGISEL64-NEXT: s_clause 0x2 -; DAGISEL64-NEXT: scratch_load_b32 v42, off, s33 -; DAGISEL64-NEXT: scratch_load_b32 v40, off, s33 offset:164 -; DAGISEL64-NEXT: scratch_load_b32 v41, off, s33 offset:168 -; DAGISEL64-NEXT: s_mov_b32 s32, s33 -; DAGISEL64-NEXT: s_xor_b64 exec, s[4:5], -1 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4 -; DAGISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8 -; DAGISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12 -; DAGISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16 -; DAGISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20 -; DAGISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24 -; DAGISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28 -; DAGISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32 -; DAGISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36 -; DAGISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40 -; DAGISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44 -; DAGISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48 -; DAGISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52 -; DAGISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56 -; DAGISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60 -; DAGISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64 -; DAGISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68 -; DAGISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72 -; DAGISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76 -; DAGISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80 -; DAGISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84 -; DAGISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88 -; DAGISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92 -; DAGISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96 -; DAGISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100 -; DAGISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104 -; DAGISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108 -; DAGISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112 -; DAGISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116 -; DAGISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120 -; DAGISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124 -; DAGISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132 -; DAGISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136 -; DAGISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140 -; DAGISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144 -; DAGISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148 -; DAGISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152 -; DAGISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156 -; DAGISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160 -; DAGISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:172 -; DAGISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:176 -; DAGISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:180 -; DAGISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:184 -; DAGISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:188 -; DAGISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:192 -; DAGISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:196 -; DAGISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:200 -; DAGISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:204 -; DAGISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:208 -; DAGISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:212 -; DAGISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:216 -; DAGISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:220 -; DAGISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:224 -; DAGISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:228 -; DAGISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:232 -; DAGISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:236 -; DAGISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:240 -; DAGISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:244 -; DAGISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:248 -; DAGISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:252 -; DAGISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:256 -; DAGISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:260 -; DAGISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:264 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:268 -; DAGISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:272 -; DAGISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:276 -; DAGISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:280 -; DAGISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:284 -; DAGISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:288 -; DAGISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:292 -; DAGISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:296 -; DAGISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:300 -; DAGISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:304 -; DAGISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:308 -; DAGISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:312 -; DAGISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:316 -; DAGISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:320 -; DAGISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:324 -; DAGISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:328 -; DAGISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:332 -; DAGISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:336 -; DAGISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:340 -; DAGISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:344 -; DAGISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:348 -; DAGISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:352 -; DAGISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:356 -; DAGISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:360 -; DAGISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:364 -; DAGISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:368 -; DAGISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:372 -; DAGISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:376 -; DAGISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:380 -; DAGISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:384 -; DAGISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:388 -; DAGISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:392 -; DAGISEL64-NEXT: s_clause 0x1f -; DAGISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:396 -; DAGISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:400 -; DAGISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:404 -; DAGISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:408 -; DAGISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:412 -; DAGISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:416 -; DAGISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:420 -; DAGISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:424 -; DAGISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:428 -; DAGISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:432 -; DAGISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:436 -; DAGISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:440 -; DAGISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:444 -; DAGISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:448 -; DAGISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:452 -; DAGISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:456 -; DAGISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:460 -; DAGISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:464 -; DAGISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:468 -; DAGISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:472 -; DAGISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:476 -; DAGISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:480 -; DAGISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:484 -; DAGISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:488 -; DAGISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:492 -; DAGISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:496 -; DAGISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:500 -; DAGISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:504 -; DAGISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:508 -; DAGISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:512 -; DAGISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:516 -; DAGISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:520 -; DAGISEL64-NEXT: s_clause 0xf -; DAGISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:524 -; DAGISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:528 -; DAGISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:532 -; DAGISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:536 -; DAGISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:540 -; DAGISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:544 -; DAGISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:548 -; DAGISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:552 -; DAGISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:556 -; DAGISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:560 -; DAGISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:564 -; DAGISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:568 -; DAGISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:572 -; DAGISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:576 -; DAGISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:580 -; DAGISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:584 -; DAGISEL64-NEXT: s_mov_b64 exec, s[4:5] -; DAGISEL64-NEXT: s_mov_b32 s33, s0 -; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 -; DAGISEL64-NEXT: s_wait_alu 0xfffe -; DAGISEL64-NEXT: s_setpc_b64 s[30:31] -; -; GISEL64-LABEL: call_from_whole_wave: -; GISEL64: ; %bb.0: -; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL64-NEXT: s_wait_expcnt 0x0 -; GISEL64-NEXT: s_wait_samplecnt 0x0 -; GISEL64-NEXT: s_wait_bvhcnt 0x0 -; GISEL64-NEXT: s_wait_kmcnt 0x0 -; GISEL64-NEXT: s_mov_b32 s0, s33 -; GISEL64-NEXT: s_mov_b32 s33, s32 -; GISEL64-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_store_b32 off, v0, s33 offset:4 -; GISEL64-NEXT: scratch_store_b32 off, v1, s33 offset:8 -; GISEL64-NEXT: scratch_store_b32 off, v2, s33 offset:12 -; GISEL64-NEXT: scratch_store_b32 off, v3, s33 offset:16 -; GISEL64-NEXT: scratch_store_b32 off, v4, s33 offset:20 -; GISEL64-NEXT: scratch_store_b32 off, v5, s33 offset:24 -; GISEL64-NEXT: scratch_store_b32 off, v6, s33 offset:28 -; GISEL64-NEXT: scratch_store_b32 off, v7, s33 offset:32 -; GISEL64-NEXT: scratch_store_b32 off, v8, s33 offset:36 -; GISEL64-NEXT: scratch_store_b32 off, v9, s33 offset:40 -; GISEL64-NEXT: scratch_store_b32 off, v10, s33 offset:44 -; GISEL64-NEXT: scratch_store_b32 off, v11, s33 offset:48 -; GISEL64-NEXT: scratch_store_b32 off, v12, s33 offset:52 -; GISEL64-NEXT: scratch_store_b32 off, v13, s33 offset:56 -; GISEL64-NEXT: scratch_store_b32 off, v14, s33 offset:60 -; GISEL64-NEXT: scratch_store_b32 off, v15, s33 offset:64 -; GISEL64-NEXT: scratch_store_b32 off, v16, s33 offset:68 -; GISEL64-NEXT: scratch_store_b32 off, v17, s33 offset:72 -; GISEL64-NEXT: scratch_store_b32 off, v18, s33 offset:76 -; GISEL64-NEXT: scratch_store_b32 off, v19, s33 offset:80 -; GISEL64-NEXT: scratch_store_b32 off, v20, s33 offset:84 -; GISEL64-NEXT: scratch_store_b32 off, v21, s33 offset:88 -; GISEL64-NEXT: scratch_store_b32 off, v22, s33 offset:92 -; GISEL64-NEXT: scratch_store_b32 off, v23, s33 offset:96 -; GISEL64-NEXT: scratch_store_b32 off, v24, s33 offset:100 -; GISEL64-NEXT: scratch_store_b32 off, v25, s33 offset:104 -; GISEL64-NEXT: scratch_store_b32 off, v26, s33 offset:108 -; GISEL64-NEXT: scratch_store_b32 off, v27, s33 offset:112 -; GISEL64-NEXT: scratch_store_b32 off, v28, s33 offset:116 -; GISEL64-NEXT: scratch_store_b32 off, v29, s33 offset:120 -; GISEL64-NEXT: scratch_store_b32 off, v30, s33 offset:124 -; GISEL64-NEXT: scratch_store_b32 off, v31, s33 offset:128 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_store_b32 off, v32, s33 offset:132 -; GISEL64-NEXT: scratch_store_b32 off, v33, s33 offset:136 -; GISEL64-NEXT: scratch_store_b32 off, v34, s33 offset:140 -; GISEL64-NEXT: scratch_store_b32 off, v35, s33 offset:144 -; GISEL64-NEXT: scratch_store_b32 off, v36, s33 offset:148 -; GISEL64-NEXT: scratch_store_b32 off, v37, s33 offset:152 -; GISEL64-NEXT: scratch_store_b32 off, v38, s33 offset:156 -; GISEL64-NEXT: scratch_store_b32 off, v39, s33 offset:160 -; GISEL64-NEXT: scratch_store_b32 off, v48, s33 offset:172 -; GISEL64-NEXT: scratch_store_b32 off, v49, s33 offset:176 -; GISEL64-NEXT: scratch_store_b32 off, v50, s33 offset:180 -; GISEL64-NEXT: scratch_store_b32 off, v51, s33 offset:184 -; GISEL64-NEXT: scratch_store_b32 off, v52, s33 offset:188 -; GISEL64-NEXT: scratch_store_b32 off, v53, s33 offset:192 -; GISEL64-NEXT: scratch_store_b32 off, v54, s33 offset:196 -; GISEL64-NEXT: scratch_store_b32 off, v55, s33 offset:200 -; GISEL64-NEXT: scratch_store_b32 off, v64, s33 offset:204 -; GISEL64-NEXT: scratch_store_b32 off, v65, s33 offset:208 -; GISEL64-NEXT: scratch_store_b32 off, v66, s33 offset:212 -; GISEL64-NEXT: scratch_store_b32 off, v67, s33 offset:216 -; GISEL64-NEXT: scratch_store_b32 off, v68, s33 offset:220 -; GISEL64-NEXT: scratch_store_b32 off, v69, s33 offset:224 -; GISEL64-NEXT: scratch_store_b32 off, v70, s33 offset:228 -; GISEL64-NEXT: scratch_store_b32 off, v71, s33 offset:232 -; GISEL64-NEXT: scratch_store_b32 off, v80, s33 offset:236 -; GISEL64-NEXT: scratch_store_b32 off, v81, s33 offset:240 -; GISEL64-NEXT: scratch_store_b32 off, v82, s33 offset:244 -; GISEL64-NEXT: scratch_store_b32 off, v83, s33 offset:248 -; GISEL64-NEXT: scratch_store_b32 off, v84, s33 offset:252 -; GISEL64-NEXT: scratch_store_b32 off, v85, s33 offset:256 -; GISEL64-NEXT: scratch_store_b32 off, v86, s33 offset:260 -; GISEL64-NEXT: scratch_store_b32 off, v87, s33 offset:264 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_store_b32 off, v96, s33 offset:268 -; GISEL64-NEXT: scratch_store_b32 off, v97, s33 offset:272 -; GISEL64-NEXT: scratch_store_b32 off, v98, s33 offset:276 -; GISEL64-NEXT: scratch_store_b32 off, v99, s33 offset:280 -; GISEL64-NEXT: scratch_store_b32 off, v100, s33 offset:284 -; GISEL64-NEXT: scratch_store_b32 off, v101, s33 offset:288 -; GISEL64-NEXT: scratch_store_b32 off, v102, s33 offset:292 -; GISEL64-NEXT: scratch_store_b32 off, v103, s33 offset:296 -; GISEL64-NEXT: scratch_store_b32 off, v112, s33 offset:300 -; GISEL64-NEXT: scratch_store_b32 off, v113, s33 offset:304 -; GISEL64-NEXT: scratch_store_b32 off, v114, s33 offset:308 -; GISEL64-NEXT: scratch_store_b32 off, v115, s33 offset:312 -; GISEL64-NEXT: scratch_store_b32 off, v116, s33 offset:316 -; GISEL64-NEXT: scratch_store_b32 off, v117, s33 offset:320 -; GISEL64-NEXT: scratch_store_b32 off, v118, s33 offset:324 -; GISEL64-NEXT: scratch_store_b32 off, v119, s33 offset:328 -; GISEL64-NEXT: scratch_store_b32 off, v128, s33 offset:332 -; GISEL64-NEXT: scratch_store_b32 off, v129, s33 offset:336 -; GISEL64-NEXT: scratch_store_b32 off, v130, s33 offset:340 -; GISEL64-NEXT: scratch_store_b32 off, v131, s33 offset:344 -; GISEL64-NEXT: scratch_store_b32 off, v132, s33 offset:348 -; GISEL64-NEXT: scratch_store_b32 off, v133, s33 offset:352 -; GISEL64-NEXT: scratch_store_b32 off, v134, s33 offset:356 -; GISEL64-NEXT: scratch_store_b32 off, v135, s33 offset:360 -; GISEL64-NEXT: scratch_store_b32 off, v144, s33 offset:364 -; GISEL64-NEXT: scratch_store_b32 off, v145, s33 offset:368 -; GISEL64-NEXT: scratch_store_b32 off, v146, s33 offset:372 -; GISEL64-NEXT: scratch_store_b32 off, v147, s33 offset:376 -; GISEL64-NEXT: scratch_store_b32 off, v148, s33 offset:380 -; GISEL64-NEXT: scratch_store_b32 off, v149, s33 offset:384 -; GISEL64-NEXT: scratch_store_b32 off, v150, s33 offset:388 -; GISEL64-NEXT: scratch_store_b32 off, v151, s33 offset:392 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_store_b32 off, v160, s33 offset:396 -; GISEL64-NEXT: scratch_store_b32 off, v161, s33 offset:400 -; GISEL64-NEXT: scratch_store_b32 off, v162, s33 offset:404 -; GISEL64-NEXT: scratch_store_b32 off, v163, s33 offset:408 -; GISEL64-NEXT: scratch_store_b32 off, v164, s33 offset:412 -; GISEL64-NEXT: scratch_store_b32 off, v165, s33 offset:416 -; GISEL64-NEXT: scratch_store_b32 off, v166, s33 offset:420 -; GISEL64-NEXT: scratch_store_b32 off, v167, s33 offset:424 -; GISEL64-NEXT: scratch_store_b32 off, v176, s33 offset:428 -; GISEL64-NEXT: scratch_store_b32 off, v177, s33 offset:432 -; GISEL64-NEXT: scratch_store_b32 off, v178, s33 offset:436 -; GISEL64-NEXT: scratch_store_b32 off, v179, s33 offset:440 -; GISEL64-NEXT: scratch_store_b32 off, v180, s33 offset:444 -; GISEL64-NEXT: scratch_store_b32 off, v181, s33 offset:448 -; GISEL64-NEXT: scratch_store_b32 off, v182, s33 offset:452 -; GISEL64-NEXT: scratch_store_b32 off, v183, s33 offset:456 -; GISEL64-NEXT: scratch_store_b32 off, v192, s33 offset:460 -; GISEL64-NEXT: scratch_store_b32 off, v193, s33 offset:464 -; GISEL64-NEXT: scratch_store_b32 off, v194, s33 offset:468 -; GISEL64-NEXT: scratch_store_b32 off, v195, s33 offset:472 -; GISEL64-NEXT: scratch_store_b32 off, v196, s33 offset:476 -; GISEL64-NEXT: scratch_store_b32 off, v197, s33 offset:480 -; GISEL64-NEXT: scratch_store_b32 off, v198, s33 offset:484 -; GISEL64-NEXT: scratch_store_b32 off, v199, s33 offset:488 -; GISEL64-NEXT: scratch_store_b32 off, v208, s33 offset:492 -; GISEL64-NEXT: scratch_store_b32 off, v209, s33 offset:496 -; GISEL64-NEXT: scratch_store_b32 off, v210, s33 offset:500 -; GISEL64-NEXT: scratch_store_b32 off, v211, s33 offset:504 -; GISEL64-NEXT: scratch_store_b32 off, v212, s33 offset:508 -; GISEL64-NEXT: scratch_store_b32 off, v213, s33 offset:512 -; GISEL64-NEXT: scratch_store_b32 off, v214, s33 offset:516 -; GISEL64-NEXT: scratch_store_b32 off, v215, s33 offset:520 -; GISEL64-NEXT: s_clause 0xf -; GISEL64-NEXT: scratch_store_b32 off, v224, s33 offset:524 -; GISEL64-NEXT: scratch_store_b32 off, v225, s33 offset:528 -; GISEL64-NEXT: scratch_store_b32 off, v226, s33 offset:532 -; GISEL64-NEXT: scratch_store_b32 off, v227, s33 offset:536 -; GISEL64-NEXT: scratch_store_b32 off, v228, s33 offset:540 -; GISEL64-NEXT: scratch_store_b32 off, v229, s33 offset:544 -; GISEL64-NEXT: scratch_store_b32 off, v230, s33 offset:548 -; GISEL64-NEXT: scratch_store_b32 off, v231, s33 offset:552 -; GISEL64-NEXT: scratch_store_b32 off, v240, s33 offset:556 -; GISEL64-NEXT: scratch_store_b32 off, v241, s33 offset:560 -; GISEL64-NEXT: scratch_store_b32 off, v242, s33 offset:564 -; GISEL64-NEXT: scratch_store_b32 off, v243, s33 offset:568 -; GISEL64-NEXT: scratch_store_b32 off, v244, s33 offset:572 -; GISEL64-NEXT: scratch_store_b32 off, v245, s33 offset:576 -; GISEL64-NEXT: scratch_store_b32 off, v246, s33 offset:580 -; GISEL64-NEXT: scratch_store_b32 off, v247, s33 offset:584 -; GISEL64-NEXT: s_mov_b64 exec, -1 -; GISEL64-NEXT: s_clause 0x2 -; GISEL64-NEXT: scratch_store_b32 off, v42, s33 -; GISEL64-NEXT: scratch_store_b32 off, v40, s33 offset:164 -; GISEL64-NEXT: scratch_store_b32 off, v41, s33 offset:168 -; GISEL64-NEXT: s_wait_alu 0xfffe -; GISEL64-NEXT: v_writelane_b32 v42, s0, 4 -; GISEL64-NEXT: s_mov_b32 s0, callee@abs32@lo -; GISEL64-NEXT: s_mov_b32 s1, callee@abs32@hi -; GISEL64-NEXT: s_addk_co_i32 s32, 0x250 -; GISEL64-NEXT: v_mov_b32_e32 v40, v8 -; GISEL64-NEXT: v_writelane_b32 v42, s4, 0 -; GISEL64-NEXT: v_mov_b32_e32 v41, v9 -; GISEL64-NEXT: v_writelane_b32 v42, s5, 1 -; GISEL64-NEXT: v_writelane_b32 v42, s30, 2 -; GISEL64-NEXT: v_writelane_b32 v42, s31, 3 -; GISEL64-NEXT: s_wait_alu 0xfffe -; GISEL64-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GISEL64-NEXT: flat_store_b32 v[40:41], v0 -; GISEL64-NEXT: v_readlane_b32 s31, v42, 3 -; GISEL64-NEXT: v_readlane_b32 s30, v42, 2 -; GISEL64-NEXT: v_readlane_b32 s5, v42, 1 -; GISEL64-NEXT: v_readlane_b32 s4, v42, 0 -; GISEL64-NEXT: v_readlane_b32 s0, v42, 4 -; GISEL64-NEXT: s_clause 0x2 -; GISEL64-NEXT: scratch_load_b32 v42, off, s33 -; GISEL64-NEXT: scratch_load_b32 v40, off, s33 offset:164 -; GISEL64-NEXT: scratch_load_b32 v41, off, s33 offset:168 -; GISEL64-NEXT: s_mov_b32 s32, s33 -; GISEL64-NEXT: s_xor_b64 exec, s[4:5], -1 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_load_b32 v0, off, s33 offset:4 -; GISEL64-NEXT: scratch_load_b32 v1, off, s33 offset:8 -; GISEL64-NEXT: scratch_load_b32 v2, off, s33 offset:12 -; GISEL64-NEXT: scratch_load_b32 v3, off, s33 offset:16 -; GISEL64-NEXT: scratch_load_b32 v4, off, s33 offset:20 -; GISEL64-NEXT: scratch_load_b32 v5, off, s33 offset:24 -; GISEL64-NEXT: scratch_load_b32 v6, off, s33 offset:28 -; GISEL64-NEXT: scratch_load_b32 v7, off, s33 offset:32 -; GISEL64-NEXT: scratch_load_b32 v8, off, s33 offset:36 -; GISEL64-NEXT: scratch_load_b32 v9, off, s33 offset:40 -; GISEL64-NEXT: scratch_load_b32 v10, off, s33 offset:44 -; GISEL64-NEXT: scratch_load_b32 v11, off, s33 offset:48 -; GISEL64-NEXT: scratch_load_b32 v12, off, s33 offset:52 -; GISEL64-NEXT: scratch_load_b32 v13, off, s33 offset:56 -; GISEL64-NEXT: scratch_load_b32 v14, off, s33 offset:60 -; GISEL64-NEXT: scratch_load_b32 v15, off, s33 offset:64 -; GISEL64-NEXT: scratch_load_b32 v16, off, s33 offset:68 -; GISEL64-NEXT: scratch_load_b32 v17, off, s33 offset:72 -; GISEL64-NEXT: scratch_load_b32 v18, off, s33 offset:76 -; GISEL64-NEXT: scratch_load_b32 v19, off, s33 offset:80 -; GISEL64-NEXT: scratch_load_b32 v20, off, s33 offset:84 -; GISEL64-NEXT: scratch_load_b32 v21, off, s33 offset:88 -; GISEL64-NEXT: scratch_load_b32 v22, off, s33 offset:92 -; GISEL64-NEXT: scratch_load_b32 v23, off, s33 offset:96 -; GISEL64-NEXT: scratch_load_b32 v24, off, s33 offset:100 -; GISEL64-NEXT: scratch_load_b32 v25, off, s33 offset:104 -; GISEL64-NEXT: scratch_load_b32 v26, off, s33 offset:108 -; GISEL64-NEXT: scratch_load_b32 v27, off, s33 offset:112 -; GISEL64-NEXT: scratch_load_b32 v28, off, s33 offset:116 -; GISEL64-NEXT: scratch_load_b32 v29, off, s33 offset:120 -; GISEL64-NEXT: scratch_load_b32 v30, off, s33 offset:124 -; GISEL64-NEXT: scratch_load_b32 v31, off, s33 offset:128 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_load_b32 v32, off, s33 offset:132 -; GISEL64-NEXT: scratch_load_b32 v33, off, s33 offset:136 -; GISEL64-NEXT: scratch_load_b32 v34, off, s33 offset:140 -; GISEL64-NEXT: scratch_load_b32 v35, off, s33 offset:144 -; GISEL64-NEXT: scratch_load_b32 v36, off, s33 offset:148 -; GISEL64-NEXT: scratch_load_b32 v37, off, s33 offset:152 -; GISEL64-NEXT: scratch_load_b32 v38, off, s33 offset:156 -; GISEL64-NEXT: scratch_load_b32 v39, off, s33 offset:160 -; GISEL64-NEXT: scratch_load_b32 v48, off, s33 offset:172 -; GISEL64-NEXT: scratch_load_b32 v49, off, s33 offset:176 -; GISEL64-NEXT: scratch_load_b32 v50, off, s33 offset:180 -; GISEL64-NEXT: scratch_load_b32 v51, off, s33 offset:184 -; GISEL64-NEXT: scratch_load_b32 v52, off, s33 offset:188 -; GISEL64-NEXT: scratch_load_b32 v53, off, s33 offset:192 -; GISEL64-NEXT: scratch_load_b32 v54, off, s33 offset:196 -; GISEL64-NEXT: scratch_load_b32 v55, off, s33 offset:200 -; GISEL64-NEXT: scratch_load_b32 v64, off, s33 offset:204 -; GISEL64-NEXT: scratch_load_b32 v65, off, s33 offset:208 -; GISEL64-NEXT: scratch_load_b32 v66, off, s33 offset:212 -; GISEL64-NEXT: scratch_load_b32 v67, off, s33 offset:216 -; GISEL64-NEXT: scratch_load_b32 v68, off, s33 offset:220 -; GISEL64-NEXT: scratch_load_b32 v69, off, s33 offset:224 -; GISEL64-NEXT: scratch_load_b32 v70, off, s33 offset:228 -; GISEL64-NEXT: scratch_load_b32 v71, off, s33 offset:232 -; GISEL64-NEXT: scratch_load_b32 v80, off, s33 offset:236 -; GISEL64-NEXT: scratch_load_b32 v81, off, s33 offset:240 -; GISEL64-NEXT: scratch_load_b32 v82, off, s33 offset:244 -; GISEL64-NEXT: scratch_load_b32 v83, off, s33 offset:248 -; GISEL64-NEXT: scratch_load_b32 v84, off, s33 offset:252 -; GISEL64-NEXT: scratch_load_b32 v85, off, s33 offset:256 -; GISEL64-NEXT: scratch_load_b32 v86, off, s33 offset:260 -; GISEL64-NEXT: scratch_load_b32 v87, off, s33 offset:264 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_load_b32 v96, off, s33 offset:268 -; GISEL64-NEXT: scratch_load_b32 v97, off, s33 offset:272 -; GISEL64-NEXT: scratch_load_b32 v98, off, s33 offset:276 -; GISEL64-NEXT: scratch_load_b32 v99, off, s33 offset:280 -; GISEL64-NEXT: scratch_load_b32 v100, off, s33 offset:284 -; GISEL64-NEXT: scratch_load_b32 v101, off, s33 offset:288 -; GISEL64-NEXT: scratch_load_b32 v102, off, s33 offset:292 -; GISEL64-NEXT: scratch_load_b32 v103, off, s33 offset:296 -; GISEL64-NEXT: scratch_load_b32 v112, off, s33 offset:300 -; GISEL64-NEXT: scratch_load_b32 v113, off, s33 offset:304 -; GISEL64-NEXT: scratch_load_b32 v114, off, s33 offset:308 -; GISEL64-NEXT: scratch_load_b32 v115, off, s33 offset:312 -; GISEL64-NEXT: scratch_load_b32 v116, off, s33 offset:316 -; GISEL64-NEXT: scratch_load_b32 v117, off, s33 offset:320 -; GISEL64-NEXT: scratch_load_b32 v118, off, s33 offset:324 -; GISEL64-NEXT: scratch_load_b32 v119, off, s33 offset:328 -; GISEL64-NEXT: scratch_load_b32 v128, off, s33 offset:332 -; GISEL64-NEXT: scratch_load_b32 v129, off, s33 offset:336 -; GISEL64-NEXT: scratch_load_b32 v130, off, s33 offset:340 -; GISEL64-NEXT: scratch_load_b32 v131, off, s33 offset:344 -; GISEL64-NEXT: scratch_load_b32 v132, off, s33 offset:348 -; GISEL64-NEXT: scratch_load_b32 v133, off, s33 offset:352 -; GISEL64-NEXT: scratch_load_b32 v134, off, s33 offset:356 -; GISEL64-NEXT: scratch_load_b32 v135, off, s33 offset:360 -; GISEL64-NEXT: scratch_load_b32 v144, off, s33 offset:364 -; GISEL64-NEXT: scratch_load_b32 v145, off, s33 offset:368 -; GISEL64-NEXT: scratch_load_b32 v146, off, s33 offset:372 -; GISEL64-NEXT: scratch_load_b32 v147, off, s33 offset:376 -; GISEL64-NEXT: scratch_load_b32 v148, off, s33 offset:380 -; GISEL64-NEXT: scratch_load_b32 v149, off, s33 offset:384 -; GISEL64-NEXT: scratch_load_b32 v150, off, s33 offset:388 -; GISEL64-NEXT: scratch_load_b32 v151, off, s33 offset:392 -; GISEL64-NEXT: s_clause 0x1f -; GISEL64-NEXT: scratch_load_b32 v160, off, s33 offset:396 -; GISEL64-NEXT: scratch_load_b32 v161, off, s33 offset:400 -; GISEL64-NEXT: scratch_load_b32 v162, off, s33 offset:404 -; GISEL64-NEXT: scratch_load_b32 v163, off, s33 offset:408 -; GISEL64-NEXT: scratch_load_b32 v164, off, s33 offset:412 -; GISEL64-NEXT: scratch_load_b32 v165, off, s33 offset:416 -; GISEL64-NEXT: scratch_load_b32 v166, off, s33 offset:420 -; GISEL64-NEXT: scratch_load_b32 v167, off, s33 offset:424 -; GISEL64-NEXT: scratch_load_b32 v176, off, s33 offset:428 -; GISEL64-NEXT: scratch_load_b32 v177, off, s33 offset:432 -; GISEL64-NEXT: scratch_load_b32 v178, off, s33 offset:436 -; GISEL64-NEXT: scratch_load_b32 v179, off, s33 offset:440 -; GISEL64-NEXT: scratch_load_b32 v180, off, s33 offset:444 -; GISEL64-NEXT: scratch_load_b32 v181, off, s33 offset:448 -; GISEL64-NEXT: scratch_load_b32 v182, off, s33 offset:452 -; GISEL64-NEXT: scratch_load_b32 v183, off, s33 offset:456 -; GISEL64-NEXT: scratch_load_b32 v192, off, s33 offset:460 -; GISEL64-NEXT: scratch_load_b32 v193, off, s33 offset:464 -; GISEL64-NEXT: scratch_load_b32 v194, off, s33 offset:468 -; GISEL64-NEXT: scratch_load_b32 v195, off, s33 offset:472 -; GISEL64-NEXT: scratch_load_b32 v196, off, s33 offset:476 -; GISEL64-NEXT: scratch_load_b32 v197, off, s33 offset:480 -; GISEL64-NEXT: scratch_load_b32 v198, off, s33 offset:484 -; GISEL64-NEXT: scratch_load_b32 v199, off, s33 offset:488 -; GISEL64-NEXT: scratch_load_b32 v208, off, s33 offset:492 -; GISEL64-NEXT: scratch_load_b32 v209, off, s33 offset:496 -; GISEL64-NEXT: scratch_load_b32 v210, off, s33 offset:500 -; GISEL64-NEXT: scratch_load_b32 v211, off, s33 offset:504 -; GISEL64-NEXT: scratch_load_b32 v212, off, s33 offset:508 -; GISEL64-NEXT: scratch_load_b32 v213, off, s33 offset:512 -; GISEL64-NEXT: scratch_load_b32 v214, off, s33 offset:516 -; GISEL64-NEXT: scratch_load_b32 v215, off, s33 offset:520 -; GISEL64-NEXT: s_clause 0xf -; GISEL64-NEXT: scratch_load_b32 v224, off, s33 offset:524 -; GISEL64-NEXT: scratch_load_b32 v225, off, s33 offset:528 -; GISEL64-NEXT: scratch_load_b32 v226, off, s33 offset:532 -; GISEL64-NEXT: scratch_load_b32 v227, off, s33 offset:536 -; GISEL64-NEXT: scratch_load_b32 v228, off, s33 offset:540 -; GISEL64-NEXT: scratch_load_b32 v229, off, s33 offset:544 -; GISEL64-NEXT: scratch_load_b32 v230, off, s33 offset:548 -; GISEL64-NEXT: scratch_load_b32 v231, off, s33 offset:552 -; GISEL64-NEXT: scratch_load_b32 v240, off, s33 offset:556 -; GISEL64-NEXT: scratch_load_b32 v241, off, s33 offset:560 -; GISEL64-NEXT: scratch_load_b32 v242, off, s33 offset:564 -; GISEL64-NEXT: scratch_load_b32 v243, off, s33 offset:568 -; GISEL64-NEXT: scratch_load_b32 v244, off, s33 offset:572 -; GISEL64-NEXT: scratch_load_b32 v245, off, s33 offset:576 -; GISEL64-NEXT: scratch_load_b32 v246, off, s33 offset:580 -; GISEL64-NEXT: scratch_load_b32 v247, off, s33 offset:584 -; GISEL64-NEXT: s_mov_b64 exec, s[4:5] -; GISEL64-NEXT: s_mov_b32 s33, s0 -; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0 -; GISEL64-NEXT: s_wait_alu 0xfffe -; GISEL64-NEXT: s_setpc_b64 s[30:31] - %ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent - store float %ret, ptr %p - ret void -} diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-call-whole-wave.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-call-whole-wave.ll deleted file mode 100644 index a744bf318be9a..0000000000000 --- a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-call-whole-wave.ll +++ /dev/null @@ -1,46 +0,0 @@ -; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s - -define amdgpu_cs void @indirect(ptr %fn, i32 %x) { - ; CHECK: Indirect whole wave calls are not allowed - %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr %fn, i32 %x) - ret void -} - -declare amdgpu_gfx_whole_wave void @variadic_callee(i1 %active, i32 %x, ...) - -define amdgpu_cs void @variadic(ptr %fn, i32 %x) { - ; CHECK: Variadic whole wave calls are not allowed - %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @variadic_callee, i32 %x) - ret void -} - -declare amdgpu_gfx void @bad_cc_callee(i1 %active, i32 %x) - -define amdgpu_cs void @bad_cc(i32 %x) { - ; CHECK: Callee must have the amdgpu_gfx_whole_wave calling convention - %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @bad_cc_callee, i32 %x) - ret void -} - -declare amdgpu_gfx_whole_wave i32 @no_i1_callee(i32 %active, i32 %y, i32 %z) - -define amdgpu_cs void @no_i1(i32 %x) { - ; CHECK: Callee must have i1 as its first argument - %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @no_i1_callee, i32 %x, i32 0) - ret void -} - -declare amdgpu_gfx_whole_wave i32 @good_callee(i1 %active, i32 %x, i32 inreg %y) - -define amdgpu_cs void @bad_args(i32 %x) { - ; CHECK: Call argument count must match callee argument count - %whatever.0 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x) - - ; CHECK: Argument types must match - %whatever.1 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i64 inreg 0) - - ; CHECK: Argument inreg attributes must match - %whatever.2 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 0) - - ret void -}