diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 41ff54f0781a2..5affb9310d7f6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1616,6 +1616,11 @@ class TargetTransformInfo { TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, bool UseMaskForCond = false, bool UseMaskForGaps = false) const; + /// \return The cost of vp intrinsic vp.load.ff. + LLVM_ABI InstructionCost getFirstFaultLoadCost( + Type *DataTy, Align Alignment, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + /// A helper function to determine the type of reduction algorithm used /// for a given \p Opcode and set of FastMathFlags \p FMF. static bool requiresOrderedReduction(std::optional FMF) { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 566e1cf51631a..bb299becfdcba 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -885,6 +885,12 @@ class TargetTransformInfoImplBase { return 1; } + virtual InstructionCost + getFirstFaultLoadCost(Type *DataTy, Align Alignment, + TTI::TargetCostKind CostKind) const { + return InstructionCost::getInvalid(); + } + virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index dce423fc1b18b..34a61b03c6e38 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1795,6 +1795,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } } + if (ICA.getID() == Intrinsic::vp_load_ff) { + Type *RetTy = ICA.getReturnType(); + Type *DataTy = cast(RetTy)->getElementType(0); + Align Alignment; + if (auto *VPI = dyn_cast_or_null(ICA.getInst())) + Alignment = VPI->getPointerAlignment().valueOrOne(); + return thisT()->getFirstFaultLoadCost(DataTy, Alignment, CostKind); + } if (ICA.getID() == Intrinsic::vp_scatter) { if (ICA.isTypeBasedOnly()) { IntrinsicCostAttributes MaskedScatter( diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 09b50c5270e57..c356075a08642 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1217,6 +1217,15 @@ InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost( return Cost; } +InstructionCost +TargetTransformInfo::getFirstFaultLoadCost(Type *DataTy, Align Alignment, + TTI::TargetCostKind CostKind) const { + InstructionCost Cost = + TTIImpl->getFirstFaultLoadCost(DataTy, Alignment, CostKind); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + InstructionCost TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8070a512ab078..457b6009428f6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -24788,6 +24788,22 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, return true; } +bool RISCVTargetLowering::isLegalFirstFaultLoad(EVT DataType, + Align Alignment) const { + if (!Subtarget.hasVInstructions()) + return false; + + EVT ScalarType = DataType.getScalarType(); + if (!isLegalElementTypeForRVV(ScalarType)) + return false; + + if (!Subtarget.enableUnalignedVectorMem() && + Alignment < ScalarType.getStoreSize()) + return false; + + return true; +} + MachineInstr * RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 3f81ed74c12ed..f9ed914ee84ff 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -425,6 +425,10 @@ class RISCVTargetLowering : public TargetLowering { /// alignment is legal. bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; + /// Return true if a fault-only-first load of the given result type and + /// alignment is legal. + bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const; + unsigned getMaxSupportedInterleaveFactor() const override { return 8; } bool fallBackToDAGISel(const Instruction &Inst) const override; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index a06faa414a2ef..c1068ae133a15 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1069,6 +1069,17 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( return MemCost + ShuffleCost; } +InstructionCost +RISCVTTIImpl::getFirstFaultLoadCost(Type *DataTy, Align Alignment, + TTI::TargetCostKind CostKind) const { + EVT DataTypeVT = TLI->getValueType(DL, DataTy); + if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment)) + return BaseT::getFirstFaultLoadCost(DataTy, Alignment, CostKind); + + return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, nullptr); +} + InstructionCost RISCVTTIImpl::getGatherScatterOpCost( unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 47e0a250d285a..377249dfa2954 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -188,6 +188,10 @@ class RISCVTTIImpl final : public BasicTTIImplBase { Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; + InstructionCost + getFirstFaultLoadCost(Type *DataTy, Align Alignment, + TTI::TargetCostKind CostKind) const override; + InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, diff --git a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll index 71746caf35f2e..ba792d8f0955b 100644 --- a/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/vp-intrinsics.ll @@ -836,74 +836,74 @@ define void @abs() { ret void } -define void @load() { +define void @load(ptr %src) { ; CHECK-LABEL: 'load' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = load <2 x i8>, ptr undef, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = load <4 x i8>, ptr undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = load <8 x i8>, ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = load <16 x i8>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = load <2 x i64>, ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t12 = load <4 x i64>, ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t14 = load <8 x i64>, ptr undef, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t16 = load <16 x i64>, ptr undef, align 128 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call @llvm.vp.load.nxv2i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = load , ptr undef, align 2 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call @llvm.vp.load.nxv4i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = load , ptr undef, align 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call @llvm.vp.load.nxv8i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t22 = load , ptr undef, align 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call @llvm.vp.load.nxv16i8.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = load , ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call @llvm.vp.load.nxv2i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t26 = load , ptr undef, align 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call @llvm.vp.load.nxv4i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t28 = load , ptr undef, align 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call @llvm.vp.load.nxv8i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t30 = load , ptr undef, align 64 -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call @llvm.vp.load.nxv16i64.p0(ptr undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t32 = load , ptr undef, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t0 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t2 = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t4 = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t6 = call <16 x i8> @llvm.vp.load.v16i8.p0(ptr %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t8 = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t10 = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t12 = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t14 = call <16 x i64> @llvm.vp.load.v16i64.p0(ptr %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t16 = call @llvm.vp.load.nxv2i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t17 = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t18 = call @llvm.vp.load.nxv4i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t19 = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t20 = call @llvm.vp.load.nxv8i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %t21 = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t22 = call @llvm.vp.load.nxv16i8.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t23 = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t24 = call @llvm.vp.load.nxv2i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t25 = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t26 = call @llvm.vp.load.nxv4i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t27 = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t28 = call @llvm.vp.load.nxv8i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t29 = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t30 = call @llvm.vp.load.nxv16i64.p0(ptr %src, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t31 = call { , i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr undef, <2 x i1> undef, i32 undef) - %t1 = load <2 x i8>, ptr undef - %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr undef, <4 x i1> undef, i32 undef) - %t3 = load <4 x i8>, ptr undef - %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr undef, <8 x i1> undef, i32 undef) - %t5 = load <8 x i8>, ptr undef - %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr undef, <16 x i1> undef, i32 undef) - %t7 = load <16 x i8>, ptr undef - %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr undef, <2 x i1> undef, i32 undef) - %t9 = load <2 x i64>, ptr undef - %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr undef, <4 x i1> undef, i32 undef) - %t12 = load <4 x i64>, ptr undef - %t13 = call <8 x i64> @llvm.vp.load.v8i64(ptr undef, <8 x i1> undef, i32 undef) - %t14 = load <8 x i64>, ptr undef - %t15 = call <16 x i64> @llvm.vp.load.v16i64(ptr undef, <16 x i1> undef, i32 undef) - %t16 = load <16 x i64>, ptr undef - %t17 = call @llvm.vp.load.nxv2i8(ptr undef, undef, i32 undef) - %t18 = load , ptr undef - %t19 = call @llvm.vp.load.nxv4i8(ptr undef, undef, i32 undef) - %t20 = load , ptr undef - %t21 = call @llvm.vp.load.nxv8i8(ptr undef, undef, i32 undef) - %t22 = load , ptr undef - %t23 = call @llvm.vp.load.nxv16i8(ptr undef, undef, i32 undef) - %t24 = load , ptr undef - %t25 = call @llvm.vp.load.nxv2i64(ptr undef, undef, i32 undef) - %t26 = load , ptr undef - %t27 = call @llvm.vp.load.nxv4i64(ptr undef, undef, i32 undef) - %t28 = load , ptr undef - %t29 = call @llvm.vp.load.nxv8i64(ptr undef, undef, i32 undef) - %t30 = load , ptr undef - %t31 = call @llvm.vp.load.nxv16i64(ptr undef, undef, i32 undef) - %t32 = load , ptr undef + %t0 = call <2 x i8> @llvm.vp.load.v2i8(ptr %src, <2 x i1> undef, i32 undef) + %t1 = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr align 1 %src, <2 x i1> undef, i32 undef) + %t2 = call <4 x i8> @llvm.vp.load.v4i8(ptr %src, <4 x i1> undef, i32 undef) + %t3 = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr align 1 %src, <4 x i1> undef, i32 undef) + %t4 = call <8 x i8> @llvm.vp.load.v8i8(ptr %src, <8 x i1> undef, i32 undef) + %t5 = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr align 1 %src, <8 x i1> undef, i32 undef) + %t6 = call <16 x i8> @llvm.vp.load.v16i8(ptr %src, <16 x i1> undef, i32 undef) + %t7 = call { <16 x i8>, i32 } @llvm.vp.load.ff.v16i8.p0(ptr align 1 %src, <16 x i1> undef, i32 undef) + %t8 = call <2 x i64> @llvm.vp.load.v2i64(ptr %src, <2 x i1> undef, i32 undef) + %t9 = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr align 8 %src, <2 x i1> undef, i32 undef) + %t10 = call <4 x i64> @llvm.vp.load.v4i64(ptr %src, <4 x i1> undef, i32 undef) + %t11 = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr align 8 %src, <4 x i1> undef, i32 undef) + %t12 = call <8 x i64> @llvm.vp.load.v8i64(ptr %src, <8 x i1> undef, i32 undef) + %t13 = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr align 8 %src, <8 x i1> undef, i32 undef) + %t14 = call <16 x i64> @llvm.vp.load.v16i64(ptr %src, <16 x i1> undef, i32 undef) + %t15 = call { <16 x i64>, i32 } @llvm.vp.load.ff.v16i64.p0(ptr align 8 %src, <16 x i1> undef, i32 undef) + %t16 = call @llvm.vp.load.nxv2i8(ptr %src, undef, i32 undef) + %t17 = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr align 1 %src, undef, i32 undef) + %t18 = call @llvm.vp.load.nxv4i8(ptr %src, undef, i32 undef) + %t19 = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr align 1 %src, undef, i32 undef) + %t20 = call @llvm.vp.load.nxv8i8(ptr %src, undef, i32 undef) + %t21 = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr align 1 %src, undef, i32 undef) + %t22 = call @llvm.vp.load.nxv16i8(ptr %src, undef, i32 undef) + %t23 = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr align 1 %src, undef, i32 undef) + %t24 = call @llvm.vp.load.nxv2i64(ptr %src, undef, i32 undef) + %t25 = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr align 8 %src, undef, i32 undef) + %t26 = call @llvm.vp.load.nxv4i64(ptr %src, undef, i32 undef) + %t27 = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr align 8 %src, undef, i32 undef) + %t28 = call @llvm.vp.load.nxv8i64(ptr %src, undef, i32 undef) + %t29 = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr align 8 %src, undef, i32 undef) + %t30 = call @llvm.vp.load.nxv16i64(ptr %src, undef, i32 undef) + %t31 = call { , i32 } @llvm.vp.load.ff.nxv16i64.p0(ptr align 8 %src, undef, i32 undef) ret void }