diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 73dce230575d8..a1684b87722cb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -469,12 +469,8 @@ static Register buildLoadInst(SPIRVType *BaseType, Register PtrRegister, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR, LLT LowLevelType, Register DestinationReg = Register(0)) { - MachineRegisterInfo *MRI = MIRBuilder.getMRI(); - if (!DestinationReg.isValid()) { - DestinationReg = MRI->createVirtualRegister(&SPIRV::iIDRegClass); - MRI->setType(DestinationReg, LLT::scalar(64)); - GR->assignSPIRVTypeToVReg(BaseType, DestinationReg, MIRBuilder.getMF()); - } + if (!DestinationReg.isValid()) + DestinationReg = createVirtualRegister(BaseType, GR, MIRBuilder); // TODO: consider using correct address space and alignment (p0 is canonical // type for selection though). MachinePointerInfo PtrInfo = MachinePointerInfo(); @@ -2151,7 +2147,7 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call, const SPIRVType *PointerSizeTy = GR->getOrCreateSPIRVPointerType( Int32Ty, MIRBuilder, SPIRV::StorageClass::Function); for (unsigned I = 0; I < LocalSizeNum; ++I) { - Register Reg = MRI->createVirtualRegister(&SPIRV::iIDRegClass); + Register Reg = MRI->createVirtualRegister(&SPIRV::pIDRegClass); MRI->setType(Reg, LLType); GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF()); auto GEPInst = MIRBuilder.buildIntrinsic( @@ -2539,23 +2535,11 @@ std::optional lowerBuiltin(const StringRef DemangledCall, SPIRVGlobalRegistry *GR) { LLVM_DEBUG(dbgs() << "Lowering builtin call: " << DemangledCall << "\n"); - // SPIR-V type and return register. - Register ReturnRegister = OrigRet; - SPIRVType *ReturnType = nullptr; - if (OrigRetTy && !OrigRetTy->isVoidTy()) { - ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder); - if (!MIRBuilder.getMRI()->getRegClassOrNull(ReturnRegister)) - MIRBuilder.getMRI()->setRegClass(ReturnRegister, - GR->getRegClass(ReturnType)); - } else if (OrigRetTy && OrigRetTy->isVoidTy()) { - ReturnRegister = MIRBuilder.getMRI()->createVirtualRegister(&IDRegClass); - MIRBuilder.getMRI()->setType(ReturnRegister, LLT::scalar(64)); - ReturnType = GR->assignTypeToVReg(OrigRetTy, ReturnRegister, MIRBuilder); - } - // Lookup the builtin in the TableGen records. + SPIRVType *SpvType = GR->getSPIRVTypeForVReg(OrigRet); + assert(SpvType && "Inconsistent return register: expected valid type info"); std::unique_ptr Call = - lookupBuiltin(DemangledCall, Set, ReturnRegister, ReturnType, Args); + lookupBuiltin(DemangledCall, Set, OrigRet, SpvType, Args); if (!Call) { LLVM_DEBUG(dbgs() << "Builtin record was not found!\n"); diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 3c5397319aaf2..3fdaa6aa3257e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -539,6 +539,23 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (isFunctionDecl && !DemangledName.empty() && (canUseGLSL || canUseOpenCL)) { + if (ResVReg.isValid()) { + if (!GR->getSPIRVTypeForVReg(ResVReg)) { + const Type *RetTy = OrigRetTy; + if (auto *PtrRetTy = dyn_cast(OrigRetTy)) { + const Value *OrigValue = Info.OrigRet.OrigValue; + if (!OrigValue) + OrigValue = Info.CB; + if (OrigValue) + if (Type *ElemTy = GR->findDeducedElementType(OrigValue)) + RetTy = + TypedPointerType::get(ElemTy, PtrRetTy->getAddressSpace()); + } + setRegClassType(ResVReg, RetTy, GR, MIRBuilder); + } + } else { + ResVReg = createVirtualRegister(OrigRetTy, GR, MIRBuilder); + } SmallVector ArgVRegs; for (auto Arg : Info.OrigArgs) { assert(Arg.Regs.size() == 1 && "Call arg has multiple VRegs"); diff --git a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp index b82c2538a8136..48df845efd76b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVDuplicatesTracker.cpp @@ -69,16 +69,31 @@ void SPIRVGeneralDuplicatesTracker::buildDepsGraph( MachineOperand *RegOp = &VRegDef->getOperand(0); if (Reg2Entry.count(RegOp) == 0 && (MI->getOpcode() != SPIRV::OpVariable || i != 3)) { - std::string DiagMsg; - raw_string_ostream OS(DiagMsg); - OS << "Unexpected pattern while building a dependency " - "graph.\nInstruction: "; - MI->print(OS); - OS << "Operand: "; - Op.print(OS); - OS << "\nOperand definition: "; - VRegDef->print(OS); - report_fatal_error(DiagMsg.c_str()); + // try to repair the unexpected code pattern + bool IsFixed = false; + if (VRegDef->getOpcode() == TargetOpcode::G_CONSTANT && + RegOp->isReg() && MRI.getType(RegOp->getReg()).isScalar()) { + const Constant *C = VRegDef->getOperand(1).getCImm(); + add(C, MI->getParent()->getParent(), RegOp->getReg()); + auto Iter = CT.Storage.find(C); + if (Iter != CT.Storage.end()) { + SPIRV::DTSortableEntry &MissedEntry = Iter->second; + Reg2Entry[RegOp] = &MissedEntry; + IsFixed = true; + } + } + if (!IsFixed) { + std::string DiagMsg; + raw_string_ostream OS(DiagMsg); + OS << "Unexpected pattern while building a dependency " + "graph.\nInstruction: "; + MI->print(OS); + OS << "Operand: "; + Op.print(OS); + OS << "\nOperand definition: "; + VRegDef->print(OS); + report_fatal_error(DiagMsg.c_str()); + } } if (Reg2Entry.count(RegOp)) E->addDep(Reg2Entry[RegOp]); diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index e6ef40e010dc2..e6f136cc81b4b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -17,6 +17,7 @@ #include "SPIRVSubtarget.h" #include "SPIRVTargetMachine.h" #include "SPIRVUtils.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" @@ -67,7 +68,7 @@ class SPIRVEmitIntrinsics public InstVisitor { SPIRVTargetMachine *TM = nullptr; SPIRVGlobalRegistry *GR = nullptr; - Function *F = nullptr; + Function *CurrF = nullptr; bool TrackConstants = true; bool HaveFunPtrs = false; DenseMap AggrConsts; @@ -76,8 +77,33 @@ class SPIRVEmitIntrinsics SPIRV::InstructionSet::InstructionSet InstrSet; // a register of Instructions that don't have a complete type definition - DenseMap UncompleteTypeInfo; - SmallVector PostprocessWorklist; + bool CanTodoType = true; + unsigned TodoTypeSz = 0; + DenseMap TodoType; + void insertTodoType(Value *Op) { + // TODO: add isa(Op) to no-insert + if (CanTodoType && !isa(Op)) { + auto It = TodoType.try_emplace(Op, true); + if (It.second) + ++TodoTypeSz; + } + } + void eraseTodoType(Value *Op) { + auto It = TodoType.find(Op); + if (It != TodoType.end() && It->second) { + TodoType[Op] = false; + --TodoTypeSz; + } + } + bool isTodoType(Value *Op) { + if (isa(Op)) + return false; + auto It = TodoType.find(Op); + return It != TodoType.end() && It->second; + } + // a register of Instructions that were visited by deduceOperandElementType() + // to validate operand types with an instruction + std::unordered_set TypeValidated; // well known result types of builtins enum WellKnownTypes { Event }; @@ -86,7 +112,8 @@ class SPIRVEmitIntrinsics Type *deduceElementType(Value *I, bool UnknownElemTypeI8); Type *deduceElementTypeHelper(Value *I, bool UnknownElemTypeI8); Type *deduceElementTypeHelper(Value *I, std::unordered_set &Visited, - bool UnknownElemTypeI8); + bool UnknownElemTypeI8, + bool IgnoreKnownType = false); Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand, bool UnknownElemTypeI8); Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand, @@ -105,8 +132,10 @@ class SPIRVEmitIntrinsics bool UnknownElemTypeI8); // deduce Types of operands of the Instruction if possible - void deduceOperandElementType(Instruction *I, Instruction *AskOp = 0, - Type *AskTy = 0, CallInst *AssignCI = 0); + void deduceOperandElementType(Instruction *I, + SmallPtrSet *UncompleteRets, + const SmallPtrSet *AskOps = nullptr, + bool IsPostprocessing = false); void preprocessCompositeConstants(IRBuilder<> &B); void preprocessUndefs(IRBuilder<> &B); @@ -122,6 +151,9 @@ class SPIRVEmitIntrinsics return B.CreateIntrinsic(IntrID, {Types}, Args); } + Type *reconstructType(Value *Op, bool UnknownElemTypeI8, + bool IsPostprocessing); + void buildAssignType(IRBuilder<> &B, Type *ElemTy, Value *Arg); void buildAssignPtr(IRBuilder<> &B, Type *ElemTy, Value *Arg); void updateAssignType(CallInst *AssignCI, Value *Arg, Value *OfType); @@ -145,12 +177,31 @@ class SPIRVEmitIntrinsics Type *deduceFunParamElementType(Function *F, unsigned OpIdx); Type *deduceFunParamElementType(Function *F, unsigned OpIdx, std::unordered_set &FVisited); - void replaceWithPtrcasted(Instruction *CI, Type *NewElemTy, Type *KnownElemTy, - CallInst *AssignCI); + + bool deduceOperandElementTypeCalledFunction( + CallInst *CI, SmallVector> &Ops, + Type *&KnownElemTy); + void deduceOperandElementTypeFunctionPointer( + CallInst *CI, SmallVector> &Ops, + Type *&KnownElemTy, bool IsPostprocessing); + + CallInst *buildSpvPtrcast(Function *F, Value *Op, Type *ElemTy); + void replaceUsesOfWithSpvPtrcast(Value *Op, Type *ElemTy, Instruction *I, + DenseMap Ptrcasts); + void propagateElemType(Value *Op, Type *ElemTy, + DenseSet> &VisitedSubst); + void + propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy, + DenseSet> &VisitedSubst); + void propagateElemTypeRec(Value *Op, Type *PtrElemTy, Type *CastElemTy, + DenseSet> &VisitedSubst, + std::unordered_set &Visited, + DenseMap Ptrcasts); + void replaceAllUsesWith(Value *Src, Value *Dest, bool DeleteOld = true); bool runOnFunction(Function &F); - bool postprocessTypes(); + bool postprocessTypes(Module &M); bool processFunctionPointers(Module &M); public: @@ -203,10 +254,8 @@ bool expectIgnoredInIRTranslation(const Instruction *I) { } bool allowEmitFakeUse(const Value *Arg) { - if (const auto *II = dyn_cast(Arg)) - if (Function *F = II->getCalledFunction()) - if (F->getName().starts_with("llvm.spv.")) - return false; + if (isSpvIntrinsic(Arg)) + return false; if (dyn_cast(Arg) || dyn_cast(Arg) || dyn_cast(Arg)) return false; @@ -280,17 +329,10 @@ void SPIRVEmitIntrinsics::replaceAllUsesWith(Value *Src, Value *Dest, GR->updateIfExistDeducedElementType(Src, Dest, DeleteOld); GR->updateIfExistAssignPtrTypeInstr(Src, Dest, DeleteOld); // Update uncomplete type records if any - auto It = UncompleteTypeInfo.find(Src); - if (It == UncompleteTypeInfo.end()) - return; - if (DeleteOld) { - unsigned Pos = It->second; - UncompleteTypeInfo.erase(Src); - UncompleteTypeInfo[Dest] = Pos; - PostprocessWorklist[Pos] = Dest; - } else { - UncompleteTypeInfo[Dest] = PostprocessWorklist.size(); - PostprocessWorklist.push_back(Dest); + if (isTodoType(Src)) { + if (DeleteOld) + eraseTodoType(Src); + insertTodoType(Dest); } } @@ -314,8 +356,11 @@ static inline Type *restoreMutatedType(SPIRVGlobalRegistry *GR, Instruction *I, // Reconstruct type with nested element types according to deduced type info. // Return nullptr if no detailed type info is available. -static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) { +Type *SPIRVEmitIntrinsics::reconstructType(Value *Op, bool UnknownElemTypeI8, + bool IsPostprocessing) { Type *Ty = Op->getType(); + if (auto *OpI = dyn_cast(Op)) + Ty = restoreMutatedType(GR, OpI, Ty); if (!isUntypedPointerTy(Ty)) return Ty; // try to find the pointee type @@ -323,10 +368,17 @@ static inline Type *reconstructType(SPIRVGlobalRegistry *GR, Value *Op) { return getTypedPointerWrapper(NestedTy, getPointerAddressSpace(Ty)); // not a pointer according to the type info (e.g., Event object) CallInst *CI = GR->findAssignPtrTypeInstr(Op); - if (!CI) - return nullptr; - MetadataAsValue *MD = cast(CI->getArgOperand(1)); - return cast(MD->getMetadata())->getType(); + if (CI) { + MetadataAsValue *MD = cast(CI->getArgOperand(1)); + return cast(MD->getMetadata())->getType(); + } + if (UnknownElemTypeI8) { + if (!IsPostprocessing) + insertTodoType(Op); + return getTypedPointerWrapper(IntegerType::getInt8Ty(Op->getContext()), + getPointerAddressSpace(Ty)); + } + return nullptr; } void SPIRVEmitIntrinsics::buildAssignType(IRBuilder<> &B, Type *Ty, @@ -354,7 +406,7 @@ void SPIRVEmitIntrinsics::buildAssignPtr(IRBuilder<> &B, Type *ElemTy, Value *OfType = PoisonValue::get(ElemTy); CallInst *AssignPtrTyCI = GR->findAssignPtrTypeInstr(Arg); if (AssignPtrTyCI == nullptr || - AssignPtrTyCI->getParent()->getParent() != F) { + AssignPtrTyCI->getParent()->getParent() != CurrF) { AssignPtrTyCI = buildIntrWithMD( Intrinsic::spv_assign_ptr_type, {Arg->getType()}, OfType, Arg, {B.getInt32(getPointerAddressSpace(Arg->getType()))}, B); @@ -379,8 +431,97 @@ void SPIRVEmitIntrinsics::updateAssignType(CallInst *AssignCI, Value *Arg, GR->addDeducedElementType(Arg, ElemTy); } +CallInst *SPIRVEmitIntrinsics::buildSpvPtrcast(Function *F, Value *Op, + Type *ElemTy) { + IRBuilder<> B(Op->getContext()); + if (auto *OpI = dyn_cast(Op)) { + // spv_ptrcast's argument Op denotes an instruction that generates + // a value, and we may use getInsertionPointAfterDef() + setInsertPointAfterDef(B, OpI); + } else if (auto *OpA = dyn_cast(Op)) { + B.SetInsertPointPastAllocas(OpA->getParent()); + B.SetCurrentDebugLocation(DebugLoc()); + } else { + B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); + } + Type *OpTy = Op->getType(); + SmallVector Types = {OpTy, OpTy}; + SmallVector Args = {Op, buildMD(PoisonValue::get(ElemTy)), + B.getInt32(getPointerAddressSpace(OpTy))}; + CallInst *PtrCasted = + B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args); + buildAssignPtr(B, ElemTy, PtrCasted); + return PtrCasted; +} + +void SPIRVEmitIntrinsics::replaceUsesOfWithSpvPtrcast( + Value *Op, Type *ElemTy, Instruction *I, + DenseMap Ptrcasts) { + Function *F = I->getParent()->getParent(); + CallInst *PtrCastedI = nullptr; + auto It = Ptrcasts.find(F); + if (It == Ptrcasts.end()) { + PtrCastedI = buildSpvPtrcast(F, Op, ElemTy); + Ptrcasts[F] = PtrCastedI; + } else { + PtrCastedI = It->second; + } + I->replaceUsesOfWith(Op, PtrCastedI); +} + +void SPIRVEmitIntrinsics::propagateElemType( + Value *Op, Type *ElemTy, + DenseSet> &VisitedSubst) { + DenseMap Ptrcasts; + SmallVector Users(Op->users()); + for (auto *U : Users) { + if (!isa(U) || isa(U) || isSpvIntrinsic(U)) + continue; + if (!VisitedSubst.insert(std::make_pair(U, Op)).second) + continue; + Instruction *UI = dyn_cast(U); + // If the instruction was validated already, we need to keep it valid by + // keeping current Op type. + if (isa(UI) || + TypeValidated.find(UI) != TypeValidated.end()) + replaceUsesOfWithSpvPtrcast(Op, ElemTy, UI, Ptrcasts); + } +} + +void SPIRVEmitIntrinsics::propagateElemTypeRec( + Value *Op, Type *PtrElemTy, Type *CastElemTy, + DenseSet> &VisitedSubst) { + std::unordered_set Visited; + DenseMap Ptrcasts; + propagateElemTypeRec(Op, PtrElemTy, CastElemTy, VisitedSubst, Visited, + Ptrcasts); +} + +void SPIRVEmitIntrinsics::propagateElemTypeRec( + Value *Op, Type *PtrElemTy, Type *CastElemTy, + DenseSet> &VisitedSubst, + std::unordered_set &Visited, + DenseMap Ptrcasts) { + if (!Visited.insert(Op).second) + return; + SmallVector Users(Op->users()); + for (auto *U : Users) { + if (!isa(U) || isa(U) || isSpvIntrinsic(U)) + continue; + if (!VisitedSubst.insert(std::make_pair(U, Op)).second) + continue; + Instruction *UI = dyn_cast(U); + // If the instruction was validated already, we need to keep it valid by + // keeping current Op type. + if (isa(UI) || + TypeValidated.find(UI) != TypeValidated.end()) + replaceUsesOfWithSpvPtrcast(Op, CastElemTy, UI, Ptrcasts); + } +} + // Set element pointer type to the given value of ValueTy and tries to // specify this type further (recursively) by Operand value, if needed. + Type * SPIRVEmitIntrinsics::deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand, bool UnknownElemTypeI8) { @@ -455,23 +596,22 @@ void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy, if (isUntypedPointerTy(RefTy)) { if (!UnknownElemTypeI8) return; - if (auto *I = dyn_cast(Op)) { - UncompleteTypeInfo[I] = PostprocessWorklist.size(); - PostprocessWorklist.push_back(I); - } + insertTodoType(Op); } Ty = RefTy; } Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( - Value *I, std::unordered_set &Visited, bool UnknownElemTypeI8) { + Value *I, std::unordered_set &Visited, bool UnknownElemTypeI8, + bool IgnoreKnownType) { // allow to pass nullptr as an argument if (!I) return nullptr; // maybe already known - if (Type *KnownTy = GR->findDeducedElementType(I)) - return KnownTy; + if (!IgnoreKnownType) + if (Type *KnownTy = GR->findDeducedElementType(I)) + return KnownTy; // maybe a cycle if (!Visited.insert(I).second) @@ -483,7 +623,22 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( if (auto *Ref = dyn_cast(I)) { maybeAssignPtrType(Ty, I, Ref->getAllocatedType(), UnknownElemTypeI8); } else if (auto *Ref = dyn_cast(I)) { - Ty = Ref->getResultElementType(); + // TODO: not sure if GetElementPtrInst::getTypeAtIndex() does anything + // useful here + if (isNestedPointer(Ref->getSourceElementType())) { + Ty = Ref->getSourceElementType(); + for (Use &U : drop_begin(Ref->indices())) + Ty = GetElementPtrInst::getTypeAtIndex(Ty, U.get()); + } else { + Ty = Ref->getResultElementType(); + } + } else if (auto *Ref = dyn_cast(I)) { + Value *Op = Ref->getPointerOperand(); + Type *KnownTy = GR->findDeducedElementType(Op); + if (!KnownTy) + KnownTy = Op->getType(); + if (Type *ElemTy = getPointeeType(KnownTy)) + maybeAssignPtrType(Ty, I, ElemTy, UnknownElemTypeI8); } else if (auto *Ref = dyn_cast(I)) { Ty = deduceElementTypeByValueDeep( Ref->getValueType(), @@ -559,7 +714,7 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( } // remember the found relationship - if (Ty) { + if (Ty && !IgnoreKnownType) { // specify nested types if needed, otherwise return unchanged GR->addDeducedElementType(I, Ty); } @@ -601,7 +756,7 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper( if (auto *PtrTy = dyn_cast(OpTy)) { if (Type *NestedTy = deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8)) - Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace()); + Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace()); } else { Ty = deduceNestedTypeHelper(dyn_cast(Op), OpTy, Visited, UnknownElemTypeI8); @@ -622,7 +777,7 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper( if (auto *PtrTy = dyn_cast(OpTy)) { if (Type *NestedTy = deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8)) - Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace()); + Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace()); } else { Ty = deduceNestedTypeHelper(dyn_cast(Op), OpTy, Visited, UnknownElemTypeI8); @@ -661,10 +816,7 @@ Type *SPIRVEmitIntrinsics::deduceElementType(Value *I, bool UnknownElemTypeI8) { return Ty; if (!UnknownElemTypeI8) return nullptr; - if (auto *Instr = dyn_cast(I)) { - UncompleteTypeInfo[Instr] = PostprocessWorklist.size(); - PostprocessWorklist.push_back(Instr); - } + insertTodoType(I); return IntegerType::getInt8Ty(I->getContext()); } @@ -683,10 +835,9 @@ static inline Type *getAtomicElemTy(SPIRVGlobalRegistry *GR, Instruction *I, // Try to deduce element type for a call base. Returns false if this is an // indirect function invocation, and true otherwise. -static bool deduceOperandElementTypeCalledFunction( - SPIRVGlobalRegistry *GR, Instruction *I, - SPIRV::InstructionSet::InstructionSet InstrSet, CallInst *CI, - SmallVector> &Ops, Type *&KnownElemTy) { +bool SPIRVEmitIntrinsics::deduceOperandElementTypeCalledFunction( + CallInst *CI, SmallVector> &Ops, + Type *&KnownElemTy) { Function *CalledF = CI->getCalledFunction(); if (!CalledF) return false; @@ -726,7 +877,7 @@ static bool deduceOperandElementTypeCalledFunction( case SPIRV::OpAtomicUMax: case SPIRV::OpAtomicSMin: case SPIRV::OpAtomicSMax: { - KnownElemTy = getAtomicElemTy(GR, I, Op); + KnownElemTy = getAtomicElemTy(GR, CI, Op); if (!KnownElemTy) return true; Ops.push_back(std::make_pair(Op, 0)); @@ -738,32 +889,44 @@ static bool deduceOperandElementTypeCalledFunction( } // Try to deduce element type for a function pointer. -static void deduceOperandElementTypeFunctionPointer( - SPIRVGlobalRegistry *GR, Instruction *I, CallInst *CI, - SmallVector> &Ops, Type *&KnownElemTy) { +void SPIRVEmitIntrinsics::deduceOperandElementTypeFunctionPointer( + CallInst *CI, SmallVector> &Ops, + Type *&KnownElemTy, bool IsPostprocessing) { Value *Op = CI->getCalledOperand(); if (!Op || !isPointerTy(Op->getType())) return; Ops.push_back(std::make_pair(Op, std::numeric_limits::max())); FunctionType *FTy = CI->getFunctionType(); - bool IsNewFTy = false; + bool IsNewFTy = false, IsUncomplete = false; SmallVector ArgTys; for (Value *Arg : CI->args()) { Type *ArgTy = Arg->getType(); - if (ArgTy->isPointerTy()) + if (ArgTy->isPointerTy()) { if (Type *ElemTy = GR->findDeducedElementType(Arg)) { IsNewFTy = true; - ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy)); + ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy)); + if (isTodoType(Arg)) + IsUncomplete = true; + } else { + IsUncomplete = true; } + } ArgTys.push_back(ArgTy); } Type *RetTy = FTy->getReturnType(); - if (I->getType()->isPointerTy()) - if (Type *ElemTy = GR->findDeducedElementType(I)) { + if (CI->getType()->isPointerTy()) { + if (Type *ElemTy = GR->findDeducedElementType(CI)) { IsNewFTy = true; RetTy = - TypedPointerType::get(ElemTy, getPointerAddressSpace(I->getType())); + getTypedPointerWrapper(ElemTy, getPointerAddressSpace(CI->getType())); + if (isTodoType(CI)) + IsUncomplete = true; + } else { + IsUncomplete = true; } + } + if (!IsPostprocessing && IsUncomplete) + insertTodoType(Op); KnownElemTy = IsNewFTy ? FunctionType::get(RetTy, ArgTys, FTy->isVarArg()) : FTy; } @@ -772,17 +935,18 @@ static void deduceOperandElementTypeFunctionPointer( // tries to deduce them. If the Instruction has Pointer operands with known // types which differ from expected, this function tries to insert a bitcast to // resolve the issue. -void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I, - Instruction *AskOp, - Type *AskTy, - CallInst *AskCI) { +void SPIRVEmitIntrinsics::deduceOperandElementType( + Instruction *I, SmallPtrSet *UncompleteRets, + const SmallPtrSet *AskOps, bool IsPostprocessing) { SmallVector> Ops; Type *KnownElemTy = nullptr; + bool Uncomplete = false; // look for known basic patterns of type inference if (auto *Ref = dyn_cast(I)) { if (!isPointerTy(I->getType()) || !(KnownElemTy = GR->findDeducedElementType(I))) return; + Uncomplete = isTodoType(I); for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) { Value *Op = Ref->getIncomingValue(i); if (isPointerTy(Op->getType())) @@ -792,14 +956,12 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I, KnownElemTy = GR->findDeducedElementType(I); if (!KnownElemTy) return; + Uncomplete = isTodoType(I); Ops.push_back(std::make_pair(Ref->getPointerOperand(), 0)); } else if (auto *Ref = dyn_cast(I)) { - KnownElemTy = Ref->getSourceElementType(); - if (isUntypedPointerTy(KnownElemTy)) - return; - Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand()); - if (PointeeTy && !isUntypedPointerTy(PointeeTy)) + if (GR->findDeducedElementType(Ref->getPointerOperand())) return; + KnownElemTy = Ref->getSourceElementType(); Ops.push_back(std::make_pair(Ref->getPointerOperand(), GetElementPtrInst::getPointerOperandIndex())); } else if (auto *Ref = dyn_cast(I)) { @@ -812,9 +974,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I, Ops.push_back(std::make_pair(Ref->getPointerOperand(), LoadInst::getPointerOperandIndex())); } else if (auto *Ref = dyn_cast(I)) { - if (IsKernelArgInt8(Ref->getParent()->getParent(), Ref)) - return; - if (!(KnownElemTy = reconstructType(GR, Ref->getValueOperand()))) + if (!(KnownElemTy = + reconstructType(Ref->getValueOperand(), false, IsPostprocessing))) return; Type *PointeeTy = GR->findDeducedElementType(Ref->getPointerOperand()); if (PointeeTy && !isUntypedPointerTy(PointeeTy)) @@ -837,27 +998,54 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I, if (!isPointerTy(I->getType()) || !(KnownElemTy = GR->findDeducedElementType(I))) return; + Uncomplete = isTodoType(I); for (unsigned i = 0; i < Ref->getNumOperands(); i++) { Value *Op = Ref->getOperand(i); if (isPointerTy(Op->getType())) Ops.push_back(std::make_pair(Op, i)); } } else if (auto *Ref = dyn_cast(I)) { - Type *RetTy = F->getReturnType(); + Type *RetTy = CurrF->getReturnType(); if (!isPointerTy(RetTy)) return; Value *Op = Ref->getReturnValue(); if (!Op) return; - if (!(KnownElemTy = GR->findDeducedElementType(F))) { + if (!(KnownElemTy = GR->findDeducedElementType(CurrF))) { if (Type *OpElemTy = GR->findDeducedElementType(Op)) { - GR->addDeducedElementType(F, OpElemTy); - TypedPointerType *DerivedTy = - TypedPointerType::get(OpElemTy, getPointerAddressSpace(RetTy)); - GR->addReturnType(F, DerivedTy); + GR->addDeducedElementType(CurrF, OpElemTy); + GR->addReturnType(CurrF, TypedPointerType::get( + OpElemTy, getPointerAddressSpace(RetTy))); + // non-recursive update of types in function uses + DenseSet> VisitedSubst{ + std::make_pair(I, Op)}; + for (User *U : CurrF->users()) { + CallInst *CI = dyn_cast(U); + if (!CI || CI->getCalledFunction() != CurrF) + continue; + if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(CI)) { + if (Type *PrevElemTy = GR->findDeducedElementType(CI)) { + updateAssignType(AssignCI, CI, PoisonValue::get(OpElemTy)); + propagateElemType(CI, PrevElemTy, VisitedSubst); + } + } + } + TypeValidated.insert(I); + // Non-recursive update of types in the function uncomplete returns. + // This may happen just once per a function, the latch is a pair of + // findDeducedElementType(F) / addDeducedElementType(F, ...). + // With or without the latch it is a non-recursive call due to + // UncompleteRets set to nullptr in this call. + if (UncompleteRets) + for (Instruction *UncompleteRetI : *UncompleteRets) + deduceOperandElementType(UncompleteRetI, nullptr, AskOps, + IsPostprocessing); + } else if (UncompleteRets) { + UncompleteRets->insert(I); } return; } + Uncomplete = isTodoType(CurrF); Ops.push_back(std::make_pair(Op, 0)); } else if (auto *Ref = dyn_cast(I)) { if (!isPointerTy(Ref->getOperand(0)->getType())) @@ -868,37 +1056,53 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I, Type *ElemTy1 = GR->findDeducedElementType(Op1); if (ElemTy0) { KnownElemTy = ElemTy0; + Uncomplete = isTodoType(Op0); Ops.push_back(std::make_pair(Op1, 1)); } else if (ElemTy1) { KnownElemTy = ElemTy1; + Uncomplete = isTodoType(Op1); Ops.push_back(std::make_pair(Op0, 0)); } } else if (CallInst *CI = dyn_cast(I)) { if (!CI->isIndirectCall()) - deduceOperandElementTypeCalledFunction(GR, I, InstrSet, CI, Ops, - KnownElemTy); + deduceOperandElementTypeCalledFunction(CI, Ops, KnownElemTy); else if (HaveFunPtrs) - deduceOperandElementTypeFunctionPointer(GR, I, CI, Ops, KnownElemTy); + deduceOperandElementTypeFunctionPointer(CI, Ops, KnownElemTy, + IsPostprocessing); } // There is no enough info to deduce types or all is valid. if (!KnownElemTy || Ops.size() == 0) return; - LLVMContext &Ctx = F->getContext(); + LLVMContext &Ctx = CurrF->getContext(); IRBuilder<> B(Ctx); for (auto &OpIt : Ops) { Value *Op = OpIt.first; - if (Op->use_empty() || (AskOp && Op != AskOp)) + if (Op->use_empty()) continue; - Type *Ty = AskOp ? AskTy : GR->findDeducedElementType(Op); + if (AskOps && !AskOps->contains(Op)) + continue; + Type *AskTy = nullptr; + CallInst *AskCI = nullptr; + if (IsPostprocessing && AskOps) { + AskTy = GR->findDeducedElementType(Op); + AskCI = GR->findAssignPtrTypeInstr(Op); + assert(AskTy && AskCI); + } + Type *Ty = AskTy ? AskTy : GR->findDeducedElementType(Op); if (Ty == KnownElemTy) continue; Value *OpTyVal = PoisonValue::get(KnownElemTy); Type *OpTy = Op->getType(); - if (!Ty || AskTy || isUntypedPointerTy(Ty) || - UncompleteTypeInfo.contains(Op)) { + if (!Ty || AskTy || isUntypedPointerTy(Ty) || isTodoType(Op)) { + Type *PrevElemTy = GR->findDeducedElementType(Op); GR->addDeducedElementType(Op, KnownElemTy); + // check if KnownElemTy is complete + if (!Uncomplete) + eraseTodoType(Op); + else if (!IsPostprocessing) + insertTodoType(Op); // check if there is existing Intrinsic::spv_assign_ptr_type instruction CallInst *AssignCI = AskCI ? AskCI : GR->findAssignPtrTypeInstr(Op); if (AssignCI == nullptr) { @@ -910,31 +1114,21 @@ void SPIRVEmitIntrinsics::deduceOperandElementType(Instruction *I, GR->addAssignPtrTypeInstr(Op, CI); } else { updateAssignType(AssignCI, Op, OpTyVal); + DenseSet> VisitedSubst{ + std::make_pair(I, Op)}; + propagateElemTypeRec(Op, KnownElemTy, PrevElemTy, VisitedSubst); } } else { - if (auto *OpI = dyn_cast(Op)) { - // spv_ptrcast's argument Op denotes an instruction that generates - // a value, and we may use getInsertionPointAfterDef() - B.SetInsertPoint(*OpI->getInsertionPointAfterDef()); - B.SetCurrentDebugLocation(OpI->getDebugLoc()); - } else if (auto *OpA = dyn_cast(Op)) { - B.SetInsertPointPastAllocas(OpA->getParent()); - B.SetCurrentDebugLocation(DebugLoc()); - } else { - B.SetInsertPoint(F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); - } - SmallVector Types = {OpTy, OpTy}; - SmallVector Args = {Op, buildMD(OpTyVal), - B.getInt32(getPointerAddressSpace(OpTy))}; + eraseTodoType(Op); CallInst *PtrCastI = - B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args); + buildSpvPtrcast(I->getParent()->getParent(), Op, KnownElemTy); if (OpIt.second == std::numeric_limits::max()) dyn_cast(I)->setCalledOperand(PtrCastI); else I->setOperand(OpIt.second, PtrCastI); - buildAssignPtr(B, KnownElemTy, PtrCastI); } } + TypeValidated.insert(I); } void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old, @@ -961,7 +1155,7 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old, void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) { std::queue Worklist; - for (auto &I : instructions(F)) + for (auto &I : instructions(CurrF)) Worklist.push(&I); while (!Worklist.empty()) { @@ -989,7 +1183,7 @@ void SPIRVEmitIntrinsics::preprocessUndefs(IRBuilder<> &B) { void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) { std::queue Worklist; - for (auto &I : instructions(F)) + for (auto &I : instructions(CurrF)) Worklist.push(&I); while (!Worklist.empty()) { @@ -1048,7 +1242,7 @@ Instruction *SPIRVEmitIntrinsics::visitCallInst(CallInst &Call) { return &Call; const InlineAsm *IA = cast(Call.getCalledOperand()); - LLVMContext &Ctx = F->getContext(); + LLVMContext &Ctx = CurrF->getContext(); Constant *TyC = UndefValue::get(IA->getFunctionType()); MDString *ConstraintString = MDString::get(Ctx, IA->getConstraintString()); @@ -1141,9 +1335,9 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt( Type *VTy = V->getType(); // A couple of sanity checks. - assert(isPointerTy(VTy) && "Expect a pointer type!"); - if (auto PType = dyn_cast(VTy)) - if (PType->getElementType() != AssignedType) + assert((isPointerTy(VTy)) && "Expect a pointer type!"); + if (Type *ElemTy = getPointeeType(VTy)) + if (ElemTy != AssignedType) report_fatal_error("Unexpected pointer element type!"); CallInst *AssignCI = GR->findAssignPtrTypeInstr(V); @@ -1174,6 +1368,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeTargetExt( void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast( Instruction *I, Value *Pointer, Type *ExpectedElementType, unsigned OperandToReplace, IRBuilder<> &B) { + TypeValidated.insert(I); // If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source // pointer instead. The BitCastInst should be later removed when visited. while (BitCastInst *BC = dyn_cast(Pointer)) @@ -1186,7 +1381,8 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast( return; setInsertPointSkippingPhis(B, I); - MetadataAsValue *VMD = buildMD(PoisonValue::get(ExpectedElementType)); + Value *ExpectedElementVal = PoisonValue::get(ExpectedElementType); + MetadataAsValue *VMD = buildMD(ExpectedElementVal); unsigned AddressSpace = getPointerAddressSpace(Pointer->getType()); bool FirstPtrCastOrAssignPtrType = true; @@ -1223,17 +1419,30 @@ void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast( return; } - // // Do not emit spv_ptrcast if it would cast to the default pointer element - // // type (i8) of the same address space. - // if (ExpectedElementType->isIntegerTy(8)) - // return; - - // If this would be the first spv_ptrcast, do not emit spv_ptrcast and emit - // spv_assign_ptr_type instead. - if (FirstPtrCastOrAssignPtrType && - (isa(Pointer) || isa(Pointer))) { - buildAssignPtr(B, ExpectedElementType, Pointer); - return; + if (isa(Pointer) || isa(Pointer)) { + if (FirstPtrCastOrAssignPtrType) { + // If this would be the first spv_ptrcast, do not emit spv_ptrcast and + // emit spv_assign_ptr_type instead. + buildAssignPtr(B, ExpectedElementType, Pointer); + return; + } else if (isTodoType(Pointer)) { + eraseTodoType(Pointer); + if (!isa(Pointer) && !isa(Pointer)) { + // If this wouldn't be the first spv_ptrcast but existing type info is + // uncomplete, update spv_assign_ptr_type arguments. + if (CallInst *AssignCI = GR->findAssignPtrTypeInstr(Pointer)) { + Type *PrevElemTy = GR->findDeducedElementType(Pointer); + assert(PrevElemTy); + DenseSet> VisitedSubst{ + std::make_pair(I, Pointer)}; + updateAssignType(AssignCI, Pointer, ExpectedElementVal); + propagateElemType(Pointer, PrevElemTy, VisitedSubst); + } else { + buildAssignPtr(B, ExpectedElementType, Pointer); + } + return; + } + } } // Emit spv_ptrcast @@ -1249,27 +1458,48 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, IRBuilder<> &B) { // Handle basic instructions: StoreInst *SI = dyn_cast(I); - if (IsKernelArgInt8(F, SI)) { - return replacePointerOperandWithPtrCast( - I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0, - B); - } else if (SI) { + if (IsKernelArgInt8(CurrF, SI)) { + replacePointerOperandWithPtrCast( + I, SI->getValueOperand(), IntegerType::getInt8Ty(CurrF->getContext()), + 0, B); + } + if (SI) { Value *Op = SI->getValueOperand(); + Value *Pointer = SI->getPointerOperand(); Type *OpTy = Op->getType(); if (auto *OpI = dyn_cast(Op)) OpTy = restoreMutatedType(GR, OpI, OpTy); if (OpTy == Op->getType()) OpTy = deduceElementTypeByValueDeep(OpTy, Op, false); - return replacePointerOperandWithPtrCast(I, SI->getPointerOperand(), OpTy, 1, - B); - } else if (LoadInst *LI = dyn_cast(I)) { - return replacePointerOperandWithPtrCast(I, LI->getPointerOperand(), - LI->getType(), 0, B); - } else if (GetElementPtrInst *GEPI = dyn_cast(I)) { - return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(), - GEPI->getSourceElementType(), 0, B); + replacePointerOperandWithPtrCast(I, Pointer, OpTy, 1, B); + return; + } + if (LoadInst *LI = dyn_cast(I)) { + Value *Pointer = LI->getPointerOperand(); + Type *OpTy = LI->getType(); + if (auto *PtrTy = dyn_cast(OpTy)) { + if (Type *ElemTy = GR->findDeducedElementType(LI)) { + OpTy = getTypedPointerWrapper(ElemTy, PtrTy->getAddressSpace()); + } else { + Type *NewOpTy = OpTy; + OpTy = deduceElementTypeByValueDeep(OpTy, LI, false); + if (OpTy == NewOpTy) + insertTodoType(Pointer); + } + } + replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B); + return; + } + if (GetElementPtrInst *GEPI = dyn_cast(I)) { + Value *Pointer = GEPI->getPointerOperand(); + Type *OpTy = GEPI->getSourceElementType(); + replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B); + if (isNestedPointer(OpTy)) + insertTodoType(Pointer); + return; } + // TODO: review and merge with existing logics: // Handle calls to builtins (non-intrinsics): CallInst *CI = dyn_cast(I); if (!CI || CI->isIndirectCall() || CI->isInlineAsm() || @@ -1287,8 +1517,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, Type *ArgType = CalledArg->getType(); if (!isPointerTy(ArgType)) { CalledArgTys.push_back(nullptr); - } else if (isTypedPointerTy(ArgType)) { - CalledArgTys.push_back(cast(ArgType)->getElementType()); + } else if (Type *ArgTypeElem = getPointeeType(ArgType)) { + CalledArgTys.push_back(ArgTypeElem); HaveTypes = true; } else { Type *ElemTy = GR->findDeducedElementType(CalledArg); @@ -1338,7 +1568,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, if (!ExpectedType || ExpectedType->isVoidTy()) continue; - if (ExpectedType->isTargetExtTy()) + if (ExpectedType->isTargetExtTy() && + !isTypedPointerWrapper(cast(ExpectedType))) insertAssignPtrTypeTargetExt(cast(ExpectedType), ArgOperand, B); else @@ -1419,7 +1650,7 @@ Instruction *SPIRVEmitIntrinsics::visitLoadInst(LoadInst &I) { TrackConstants = false; const auto *TLI = TM->getSubtargetImpl()->getTargetLowering(); MachineMemOperand::Flags Flags = - TLI->getLoadMemOperandFlags(I, F->getDataLayout()); + TLI->getLoadMemOperandFlags(I, CurrF->getDataLayout()); auto *NewI = B.CreateIntrinsic(Intrinsic::spv_load, {I.getOperand(0)->getType()}, {I.getPointerOperand(), B.getInt16(Flags), @@ -1436,7 +1667,7 @@ Instruction *SPIRVEmitIntrinsics::visitStoreInst(StoreInst &I) { TrackConstants = false; const auto *TLI = TM->getSubtargetImpl()->getTargetLowering(); MachineMemOperand::Flags Flags = - TLI->getStoreMemOperandFlags(I, F->getDataLayout()); + TLI->getStoreMemOperandFlags(I, CurrF->getDataLayout()); auto *PtrOp = I.getPointerOperand(); auto *NewI = B.CreateIntrinsic( Intrinsic::spv_store, {I.getValueOperand()->getType(), PtrOp->getType()}, @@ -1602,8 +1833,9 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I, GR->addAssignPtrTypeInstr(Op, AssignCI); } else if (!isa(Op)) { Type *OpTy = Op->getType(); - if (auto PType = dyn_cast(OpTy)) { - buildAssignPtr(B, PType->getElementType(), Op); + Type *OpTyElem = getPointeeType(OpTy); + if (OpTyElem) { + buildAssignPtr(B, OpTyElem, Op); } else if (isPointerTy(OpTy)) { Type *ElemTy = GR->findDeducedElementType(Op); buildAssignPtr(B, ElemTy ? ElemTy : deduceElementType(Op, true), Op); @@ -1742,9 +1974,44 @@ void SPIRVEmitIntrinsics::processParamTypesByFunHeader(Function *F, if (!isUntypedPointerTy(Arg->getType())) continue; Type *ElemTy = GR->findDeducedElementType(Arg); - if (!ElemTy && hasPointeeTypeAttr(Arg) && - (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr) + if (ElemTy) + continue; + if (hasPointeeTypeAttr(Arg) && + (ElemTy = getPointeeTypeByAttr(Arg)) != nullptr) { + buildAssignPtr(B, ElemTy, Arg); + continue; + } + // search in function's call sites + for (User *U : F->users()) { + CallInst *CI = dyn_cast(U); + if (!CI || OpIdx >= CI->arg_size()) + continue; + Value *OpArg = CI->getArgOperand(OpIdx); + if (!isPointerTy(OpArg->getType())) + continue; + // maybe we already know operand's element type + if ((ElemTy = GR->findDeducedElementType(OpArg)) != nullptr) + break; + } + if (ElemTy) { buildAssignPtr(B, ElemTy, Arg); + continue; + } + if (HaveFunPtrs) { + for (User *U : Arg->users()) { + CallInst *CI = dyn_cast(U); + if (CI && !isa(CI) && CI->isIndirectCall() && + CI->getCalledOperand() == Arg && + CI->getParent()->getParent() == CurrF) { + SmallVector> Ops; + deduceOperandElementTypeFunctionPointer(CI, Ops, ElemTy, false); + if (ElemTy) { + buildAssignPtr(B, ElemTy, Arg); + break; + } + } + } + } } } @@ -1770,7 +2037,7 @@ static FunctionType *getFunctionPointerElemType(Function *F, if (ArgTy->isPointerTy()) if (Type *ElemTy = GR->findDeducedElementType(&Arg)) { IsNewFTy = true; - ArgTy = TypedPointerType::get(ElemTy, getPointerAddressSpace(ArgTy)); + ArgTy = getTypedPointerWrapper(ElemTy, getPointerAddressSpace(ArgTy)); } ArgTys.push_back(ArgTy); } @@ -1845,17 +2112,17 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { InstrSet = ST.isOpenCLEnv() ? SPIRV::InstructionSet::OpenCL_std : SPIRV::InstructionSet::GLSL_std_450; - if (!F) + if (!CurrF) HaveFunPtrs = ST.canUseExtension(SPIRV::Extension::SPV_INTEL_function_pointers); - F = &Func; + CurrF = &Func; IRBuilder<> B(Func.getContext()); AggrConsts.clear(); AggrConstTypes.clear(); AggrStores.clear(); - processParamTypesByFunHeader(F, B); + processParamTypesByFunHeader(CurrF, B); // StoreInst's operand type can be changed during the next transformations, // so we need to store it in the set. Also store already transformed types. @@ -1878,6 +2145,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { for (auto &I : instructions(Func)) Worklist.push_back(&I); + // Pass forward: use operand to deduce instructions result. for (auto &I : Worklist) { // Don't emit intrinsincs for convergence intrinsics. if (isConvergenceIntrinsic(I)) @@ -1894,8 +2162,18 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { insertAssignPtrTypeIntrs(I, B, true); } - for (auto &I : instructions(Func)) - deduceOperandElementType(&I); + // Pass backward: use instructions results to specify/update/cast operands + // where needed. + SmallPtrSet UncompleteRets; + for (auto &I : llvm::reverse(instructions(Func))) + deduceOperandElementType(&I, &UncompleteRets); + + // Pass forward for PHIs only, their operands are not preceed the instruction + // in meaning of `instructions(Func)`. + for (BasicBlock &BB : Func) + for (PHINode &Phi : BB.phis()) + if (isPointerTy(Phi.getType())) + deduceOperandElementType(&Phi, nullptr); for (auto *I : Worklist) { TrackConstants = true; @@ -1917,83 +2195,83 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) { return true; } -void SPIRVEmitIntrinsics::replaceWithPtrcasted(Instruction *CI, Type *NewElemTy, - Type *KnownElemTy, - CallInst *AssignCI) { - updateAssignType(AssignCI, CI, PoisonValue::get(NewElemTy)); - IRBuilder<> B(CI->getContext()); - B.SetInsertPoint(*CI->getInsertionPointAfterDef()); - B.SetCurrentDebugLocation(CI->getDebugLoc()); - Type *OpTy = CI->getType(); - SmallVector Types = {OpTy, OpTy}; - SmallVector Args = {CI, buildMD(PoisonValue::get(KnownElemTy)), - B.getInt32(getPointerAddressSpace(OpTy))}; - CallInst *PtrCasted = - B.CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args); - SmallVector Users(CI->users()); - for (auto *U : Users) - if (U != AssignCI && U != PtrCasted) - U->replaceUsesOfWith(CI, PtrCasted); - buildAssignPtr(B, KnownElemTy, PtrCasted); -} - // Try to deduce a better type for pointers to untyped ptr. -bool SPIRVEmitIntrinsics::postprocessTypes() { - bool Changed = false; - if (!GR) - return Changed; - for (auto IB = PostprocessWorklist.rbegin(), IE = PostprocessWorklist.rend(); - IB != IE; ++IB) { - CallInst *AssignCI = GR->findAssignPtrTypeInstr(*IB); - Type *KnownTy = GR->findDeducedElementType(*IB); - if (!KnownTy || !AssignCI || !isa(AssignCI->getArgOperand(0))) +bool SPIRVEmitIntrinsics::postprocessTypes(Module &M) { + if (!GR || TodoTypeSz == 0) + return false; + + unsigned SzTodo = TodoTypeSz; + DenseMap> ToProcess; + for (auto [Op, Enabled] : TodoType) { + // TODO: add isa(Op) to continue + if (!Enabled || isa(Op)) + continue; + CallInst *AssignCI = GR->findAssignPtrTypeInstr(Op); + Type *KnownTy = GR->findDeducedElementType(Op); + if (!KnownTy || !AssignCI) continue; + assert(Op == AssignCI->getArgOperand(0)); // Try to improve the type deduced after all Functions are processed. - if (auto *CI = dyn_cast(*IB)) { - if (Function *CalledF = CI->getCalledFunction()) { - Type *RetElemTy = GR->findDeducedElementType(CalledF); - // Fix inconsistency between known type and function's return type. - if (RetElemTy && RetElemTy != KnownTy) { - replaceWithPtrcasted(CI, RetElemTy, KnownTy, AssignCI); - Changed = true; + if (auto *CI = dyn_cast(Op)) { + CurrF = CI->getParent()->getParent(); + std::unordered_set Visited; + if (Type *ElemTy = deduceElementTypeHelper(Op, Visited, false, true)) { + if (ElemTy != KnownTy) { + DenseSet> VisitedSubst; + propagateElemType(CI, ElemTy, VisitedSubst); + eraseTodoType(Op); continue; } } } - Instruction *I = cast(AssignCI->getArgOperand(0)); - for (User *U : I->users()) { + for (User *U : Op->users()) { Instruction *Inst = dyn_cast(U); - if (!Inst || isa(Inst)) + if (Inst && !isa(Inst)) + ToProcess[Inst].insert(Op); + } + } + if (TodoTypeSz == 0) + return true; + + for (auto &F : M) { + CurrF = &F; + SmallPtrSet UncompleteRets; + for (auto &I : llvm::reverse(instructions(F))) { + auto It = ToProcess.find(&I); + if (It == ToProcess.end()) continue; - deduceOperandElementType(Inst, I, KnownTy, AssignCI); - if (KnownTy != GR->findDeducedElementType(I)) { - Changed = true; - break; - } + It->second.remove_if([this](Value *V) { return !isTodoType(V); }); + if (It->second.size() == 0) + continue; + deduceOperandElementType(&I, &UncompleteRets, &It->second, true); + if (TodoTypeSz == 0) + return true; } } - return Changed; + + return SzTodo > TodoTypeSz; } bool SPIRVEmitIntrinsics::runOnModule(Module &M) { bool Changed = false; - UncompleteTypeInfo.clear(); - PostprocessWorklist.clear(); + TodoType.clear(); for (auto &F : M) Changed |= runOnFunction(F); + // Specify function parameters after all functions were processed. for (auto &F : M) { // check if function parameter types are set + CurrF = &F; if (!F.isDeclaration() && !F.isIntrinsic()) { - const SPIRVSubtarget &ST = TM->getSubtarget(F); - GR = ST.getSPIRVGlobalRegistry(); IRBuilder<> B(F.getContext()); processParamTypes(&F, B); } } - Changed |= postprocessTypes(); + CanTodoType = false; + Changed |= postprocessTypes(M); + if (HaveFunPtrs) Changed |= processFunctionPointers(M); diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 6f222883ee07d..9ac659f6b4f11 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -28,6 +28,18 @@ #include using namespace llvm; + +inline unsigned typeToAddressSpace(const Type *Ty) { + if (auto PType = dyn_cast(Ty)) + return PType->getAddressSpace(); + if (auto PType = dyn_cast(Ty)) + return PType->getAddressSpace(); + if (auto *ExtTy = dyn_cast(Ty); + ExtTy && isTypedPointerWrapper(ExtTy)) + return ExtTy->getIntParameter(0); + report_fatal_error("Unable to convert LLVM type to SPIRVType", true); +} + SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize) : PointerSize(PointerSize), Bound(0) {} @@ -69,7 +81,7 @@ SPIRVType *SPIRVGlobalRegistry::assignTypeToVReg( void SPIRVGlobalRegistry::assignSPIRVTypeToVReg(SPIRVType *SpirvType, Register VReg, - MachineFunction &MF) { + const MachineFunction &MF) { VRegToTypeMap[&MF][VReg] = SpirvType; } @@ -570,15 +582,15 @@ Register SPIRVGlobalRegistry::getOrCreateConstNullPtr(MachineIRBuilder &MIRBuilder, SPIRVType *SpvType) { const Type *LLVMTy = getTypeForSPIRVType(SpvType); - const TypedPointerType *LLVMPtrTy = cast(LLVMTy); + unsigned AddressSpace = typeToAddressSpace(LLVMTy); // Find a constant in DT or build a new one. - Constant *CP = ConstantPointerNull::get(PointerType::get( - LLVMPtrTy->getElementType(), LLVMPtrTy->getAddressSpace())); + Constant *CP = ConstantPointerNull::get( + PointerType::get(::getPointeeType(LLVMTy), AddressSpace)); Register Res = DT.find(CP, CurMF); if (!Res.isValid()) { - LLT LLTy = LLT::pointer(LLVMPtrTy->getAddressSpace(), PointerSize); + LLT LLTy = LLT::pointer(AddressSpace, PointerSize); Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy); - CurMF->getRegInfo().setRegClass(Res, &SPIRV::iIDRegClass); + CurMF->getRegInfo().setRegClass(Res, &SPIRV::pIDRegClass); assignSPIRVTypeToVReg(SpvType, Res, *CurMF); MIRBuilder.buildInstr(SPIRV::OpConstantNull) .addDef(Res) @@ -978,18 +990,11 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType( } return getOpTypeFunction(RetTy, ParamTypes, MIRBuilder); } - unsigned AddrSpace = 0xFFFF; - if (auto PType = dyn_cast(Ty)) - AddrSpace = PType->getAddressSpace(); - else if (auto PType = dyn_cast(Ty)) - AddrSpace = PType->getAddressSpace(); - else - report_fatal_error("Unable to convert LLVM type to SPIRVType", true); + unsigned AddrSpace = typeToAddressSpace(Ty); SPIRVType *SpvElementType = nullptr; - if (auto PType = dyn_cast(Ty)) - SpvElementType = getOrCreateSPIRVType(PType->getElementType(), MIRBuilder, - AccQual, EmitIR); + if (Type *ElemTy = ::getPointeeType(Ty)) + SpvElementType = getOrCreateSPIRVType(ElemTy, MIRBuilder, AccQual, EmitIR); else SpvElementType = getOrCreateSPIRVIntegerType(8, MIRBuilder); @@ -1029,7 +1034,11 @@ SPIRVType *SPIRVGlobalRegistry::restOfCreateSPIRVType( // will be added later. For special types it is already added to DT. if (SpirvType->getOpcode() != SPIRV::OpTypeForwardPointer && !Reg.isValid() && !isSpecialOpaqueType(Ty)) { - if (!isPointerTy(Ty)) + if (auto *ExtTy = dyn_cast(Ty); + ExtTy && isTypedPointerWrapper(ExtTy)) + DT.add(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0), + &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType)); + else if (!isPointerTy(Ty)) DT.add(Ty, &MIRBuilder.getMF(), getSPIRVTypeID(SpirvType)); else if (isTypedPointerTy(Ty)) DT.add(cast(Ty)->getElementType(), @@ -1065,7 +1074,11 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType( const Type *Ty, MachineIRBuilder &MIRBuilder, SPIRV::AccessQualifier::AccessQualifier AccessQual, bool EmitIR) { Register Reg; - if (!isPointerTy(Ty)) { + if (auto *ExtTy = dyn_cast(Ty); + ExtTy && isTypedPointerWrapper(ExtTy)) { + Reg = DT.find(ExtTy->getTypeParameter(0), ExtTy->getIntParameter(0), + &MIRBuilder.getMF()); + } else if (!isPointerTy(Ty)) { Ty = adjustIntTypeByWidth(Ty); Reg = DT.find(Ty, &MIRBuilder.getMF()); } else if (isTypedPointerTy(Ty)) { diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index 3bb86e8be6950..ff4b0ea8757fa 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -330,7 +330,7 @@ class SPIRVGlobalRegistry { // In cases where the SPIR-V type is already known, this function can be // used to map it to the given VReg via an ASSIGN_TYPE instruction. void assignSPIRVTypeToVReg(SPIRVType *Type, Register VReg, - MachineFunction &MF); + const MachineFunction &MF); // Either generate a new OpTypeXXX instruction or return an existing one // corresponding to the given LLVM IR type. diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp index 59a1bf50b771b..d5b81bf46c804 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -111,8 +111,8 @@ static void doInsertBitcast(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI, SPIRVGlobalRegistry &GR, MachineInstr &I, Register OpReg, unsigned OpIdx, SPIRVType *NewPtrType) { - Register NewReg = MRI->createGenericVirtualRegister(LLT::scalar(64)); MachineIRBuilder MIB(I); + Register NewReg = createVirtualRegister(NewPtrType, &GR, MRI, MIB.getMF()); bool Res = MIB.buildInstr(SPIRV::OpBitcast) .addDef(NewReg) .addUse(GR.getSPIRVTypeID(NewPtrType)) @@ -121,8 +121,6 @@ static void doInsertBitcast(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI, *STI.getRegBankInfo()); if (!Res) report_fatal_error("insert validation bitcast: cannot constrain all uses"); - MRI->setRegClass(NewReg, &SPIRV::iIDRegClass); - GR.assignSPIRVTypeToVReg(NewPtrType, NewReg, MIB.getMF()); I.getOperand(OpIdx).setReg(NewReg); } @@ -396,6 +394,7 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const { case SPIRV::OpGenericCastToPtr: validateAccessChain(STI, MRI, GR, MI); break; + case SPIRV::OpPtrAccessChain: case SPIRV::OpInBoundsPtrAccessChain: if (MI.getNumOperands() == 4) validateAccessChain(STI, MRI, GR, MI); diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index ec1e13a90971b..90898b8bd7250 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -357,12 +357,13 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { verify(*ST.getInstrInfo()); } -static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpirvType, +static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpvType, LegalizerHelper &Helper, MachineRegisterInfo &MRI, SPIRVGlobalRegistry *GR) { Register ConvReg = MRI.createGenericVirtualRegister(ConvTy); - GR->assignSPIRVTypeToVReg(SpirvType, ConvReg, Helper.MIRBuilder.getMF()); + MRI.setRegClass(ConvReg, GR->getRegClass(SpvType)); + GR->assignSPIRVTypeToVReg(SpvType, ConvReg, Helper.MIRBuilder.getMF()); Helper.MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT) .addDef(ConvReg) .addUse(Reg); diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp index 11b9e4f6f6d17..3373d8e24dab4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp @@ -102,10 +102,7 @@ static void processNewInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, if (!ResType) { // There was no "assign type" actions, let's fix this now ResType = ScalarType; - MRI.setRegClass(ResVReg, &SPIRV::iIDRegClass); - MRI.setType(ResVReg, - LLT::scalar(GR->getScalarOrVectorBitWidth(ResType))); - GR->assignSPIRVTypeToVReg(ResType, ResVReg, *GR->CurMF); + setRegClassType(ResVReg, ResType, GR, &MRI, *GR->CurMF, true); } } } else if (mayBeInserted(Opcode) && I.getNumDefs() == 1 && @@ -124,9 +121,7 @@ static void processNewInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR, if (!ResVType) continue; // Set type & class - MRI.setRegClass(ResVReg, GR->getRegClass(ResVType)); - MRI.setType(ResVReg, GR->getRegType(ResVType)); - GR->assignSPIRVTypeToVReg(ResVType, ResVReg, *GR->CurMF); + setRegClassType(ResVReg, ResVType, GR, &MRI, *GR->CurMF, true); } // If this is a simple operation that is to be reduced by TableGen // definition we must apply some of pre-legalizer rules here diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index f899b8b67affe..1ece3044aaa7b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -13,6 +13,7 @@ #include "SPIRVUtils.h" #include "MCTargetDesc/SPIRVBaseInfo.h" #include "SPIRV.h" +#include "SPIRVGlobalRegistry.h" #include "SPIRVInstrInfo.h" #include "SPIRVSubtarget.h" #include "llvm/ADT/StringRef.h" @@ -21,6 +22,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Demangle/Demangle.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include #include @@ -405,8 +407,10 @@ bool hasBuiltinTypePrefix(StringRef Name) { } bool isSpecialOpaqueType(const Type *Ty) { - if (const TargetExtType *EType = dyn_cast(Ty)) - return hasBuiltinTypePrefix(EType->getName()); + if (const TargetExtType *ExtTy = dyn_cast(Ty)) + return isTypedPointerWrapper(ExtTy) + ? false + : hasBuiltinTypePrefix(ExtTy->getName()); return false; } @@ -684,4 +688,77 @@ bool getVacantFunctionName(Module &M, std::string &Name) { return false; } +// Assign SPIR-V type to the register. If the register has no valid assigned +// class, set register LLT type and class according to the SPIR-V type. +void setRegClassType(Register Reg, SPIRVType *SpvType, SPIRVGlobalRegistry *GR, + MachineRegisterInfo *MRI, const MachineFunction &MF, + bool Force) { + GR->assignSPIRVTypeToVReg(SpvType, Reg, MF); + if (!MRI->getRegClassOrNull(Reg) || Force) { + MRI->setRegClass(Reg, GR->getRegClass(SpvType)); + MRI->setType(Reg, GR->getRegType(SpvType)); + } +} + +// Create a SPIR-V type, assign SPIR-V type to the register. If the register has +// no valid assigned class, set register LLT type and class according to the +// SPIR-V type. +void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIRBuilder, bool Force) { + setRegClassType(Reg, GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR, + MIRBuilder.getMRI(), MIRBuilder.getMF(), Force); +} + +// Create a virtual register and assign SPIR-V type to the register. Set +// register LLT type and class according to the SPIR-V type. +Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR, + MachineRegisterInfo *MRI, + const MachineFunction &MF) { + Register Reg = MRI->createVirtualRegister(GR->getRegClass(SpvType)); + MRI->setType(Reg, GR->getRegType(SpvType)); + GR->assignSPIRVTypeToVReg(SpvType, Reg, MF); + return Reg; +} + +// Create a virtual register and assign SPIR-V type to the register. Set +// register LLT type and class according to the SPIR-V type. +Register createVirtualRegister(SPIRVType *SpvType, SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIRBuilder) { + return createVirtualRegister(SpvType, GR, MIRBuilder.getMRI(), + MIRBuilder.getMF()); +} + +// Create a SPIR-V type, virtual register and assign SPIR-V type to the +// register. Set register LLT type and class according to the SPIR-V type. +Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIRBuilder) { + return createVirtualRegister(GR->getOrCreateSPIRVType(Ty, MIRBuilder), GR, + MIRBuilder); +} + +// Return true if there is an opaque pointer type nested in the argument. +bool isNestedPointer(const Type *Ty) { + if (Ty->isPtrOrPtrVectorTy()) + return true; + if (const FunctionType *RefTy = dyn_cast(Ty)) { + if (isNestedPointer(RefTy->getReturnType())) + return true; + for (const Type *ArgTy : RefTy->params()) + if (isNestedPointer(ArgTy)) + return true; + return false; + } + if (const ArrayType *RefTy = dyn_cast(Ty)) + return isNestedPointer(RefTy->getElementType()); + return false; +} + +bool isSpvIntrinsic(const Value *Arg) { + if (const auto *II = dyn_cast(Arg)) + if (Function *F = II->getCalledFunction()) + if (F->getName().starts_with("llvm.spv.")) + return true; + return false; +} + } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index d218dbd850dc7..c0569549039d5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -34,6 +34,7 @@ class Register; class StringRef; class SPIRVInstrInfo; class SPIRVSubtarget; +class SPIRVGlobalRegistry; // This class implements a partial ordering visitor, which visits a cyclic graph // in natural topological-like ordering. Topological ordering is not defined for @@ -198,6 +199,8 @@ uint64_t getIConstVal(Register ConstReg, const MachineRegisterInfo *MRI); // Check if MI is a SPIR-V specific intrinsic call. bool isSpvIntrinsic(const MachineInstr &MI, Intrinsic::ID IntrinsicID); +// Check if it's a SPIR-V specific intrinsic call. +bool isSpvIntrinsic(const Value *Arg); // Get type of i-th operand of the metadata node. Type *getMDOperandAsType(const MDNode *N, unsigned I); @@ -276,12 +279,19 @@ inline Type *getTypedPointerWrapper(Type *ElemTy, unsigned AS) { {ElemTy}, {AS}); } -inline bool isTypedPointerWrapper(TargetExtType *ExtTy) { +inline bool isTypedPointerWrapper(const TargetExtType *ExtTy) { return ExtTy->getName() == TYPED_PTR_TARGET_EXT_NAME && ExtTy->getNumIntParameters() == 1 && ExtTy->getNumTypeParameters() == 1; } +// True if this is an instance of PointerType or TypedPointerType. +inline bool isPointerTyOrWrapper(const Type *Ty) { + if (auto *ExtTy = dyn_cast(Ty)) + return isTypedPointerWrapper(ExtTy); + return isPointerTy(Ty); +} + inline Type *applyWrappers(Type *Ty) { if (auto *ExtTy = dyn_cast(Ty)) { if (isTypedPointerWrapper(ExtTy)) @@ -296,12 +306,14 @@ inline Type *applyWrappers(Type *Ty) { return Ty; } -inline Type *getPointeeType(Type *Ty) { - if (auto PType = dyn_cast(Ty)) - return PType->getElementType(); - else if (auto *ExtTy = dyn_cast(Ty)) - if (isTypedPointerWrapper(ExtTy)) - return applyWrappers(ExtTy->getTypeParameter(0)); +inline Type *getPointeeType(const Type *Ty) { + if (Ty) { + if (auto PType = dyn_cast(Ty)) + return PType->getElementType(); + else if (auto *ExtTy = dyn_cast(Ty)) + if (isTypedPointerWrapper(ExtTy)) + return ExtTy->getTypeParameter(0); + } return nullptr; } @@ -360,5 +372,23 @@ MachineInstr *getVRegDef(MachineRegisterInfo &MRI, Register Reg); #define SPIRV_BACKEND_SERVICE_FUN_NAME "__spirv_backend_service_fun" bool getVacantFunctionName(Module &M, std::string &Name); +void setRegClassType(Register Reg, const Type *Ty, SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIRBuilder, bool Force = false); +void setRegClassType(Register Reg, const MachineInstr *SpvType, + SPIRVGlobalRegistry *GR, MachineRegisterInfo *MRI, + const MachineFunction &MF, bool Force = false); +Register createVirtualRegister(const MachineInstr *SpvType, + SPIRVGlobalRegistry *GR, + MachineRegisterInfo *MRI, + const MachineFunction &MF); +Register createVirtualRegister(const MachineInstr *SpvType, + SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIRBuilder); +Register createVirtualRegister(const Type *Ty, SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIRBuilder); + +// Return true if there is an opaque pointer type nested in the argument. +bool isNestedPointer(const Type *Ty); + } // namespace llvm #endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll index 3ebfa1d8c8a9d..6aeb29df9f7bd 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_const.ll @@ -1,7 +1,6 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK-DAG: OpCapability Int8 ; CHECK-DAG: OpCapability FunctionPointersINTEL ; CHECK-DAG: OpCapability Int64 ; CHECK: OpExtension "SPV_INTEL_function_pointers" @@ -9,19 +8,14 @@ ; CHECK-DAG: %[[TyVoid:.*]] = OpTypeVoid ; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0 ; CHECK-DAG: %[[TyFun:.*]] = OpTypeFunction %[[TyInt64]] %[[TyInt64]] -; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0 ; CHECK-DAG: %[[TyPtrFunCodeSection:.*]] = OpTypePointer CodeSectionINTEL %[[TyFun]] ; CHECK-DAG: %[[ConstFunFp:.*]] = OpConstantFunctionPointerINTEL %[[TyPtrFunCodeSection]] %[[DefFunFp:.*]] ; CHECK-DAG: %[[TyPtrFun:.*]] = OpTypePointer Function %[[TyFun]] ; CHECK-DAG: %[[TyPtrPtrFun:.*]] = OpTypePointer Function %[[TyPtrFun]] -; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]] -; CHECK-DAG: %[[TyPtrPtrInt8:.*]] = OpTypePointer Function %[[TyPtrInt8]] ; CHECK: OpFunction -; CHECK: %[[Var:.*]] = OpVariable %[[TyPtrPtrInt8]] Function -; CHECK: %[[SAddr:.*]] = OpBitcast %[[TyPtrPtrFun]] %[[Var]] -; CHECK: OpStore %[[SAddr]] %[[ConstFunFp]] -; CHECK: %[[LAddr:.*]] = OpBitcast %[[TyPtrPtrFun]] %[[Var]] -; CHECK: %[[FP:.*]] = OpLoad %[[TyPtrFun]] %[[LAddr]] +; CHECK: %[[Var:.*]] = OpVariable %[[TyPtrPtrFun]] Function +; CHECK: OpStore %[[Var]] %[[ConstFunFp]] +; CHECK: %[[FP:.*]] = OpLoad %[[TyPtrFun]] %[[Var]] ; CHECK: OpFunctionPointerCallINTEL %[[TyInt64]] %[[FP]] %[[#]] ; CHECK: OpFunctionEnd diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll index eb7b1dffaee50..9fa46f50a2e89 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_function_pointers/fp_two_calls.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_function_pointers %s -o - | FileCheck %s ; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: OpCapability Int8 @@ -12,13 +12,15 @@ ; CHECK-DAG: OpName %[[test:.*]] "test" ; CHECK-DAG: %[[TyVoid:.*]] = OpTypeVoid ; CHECK-DAG: %[[TyFloat32:.*]] = OpTypeFloat 32 -; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0 ; CHECK-DAG: %[[TyInt64:.*]] = OpTypeInt 64 0 +; CHECK-DAG: %[[TyInt8:.*]] = OpTypeInt 8 0 ; CHECK-DAG: %[[TyPtrInt8:.*]] = OpTypePointer Function %[[TyInt8]] -; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrInt8]] -; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]] -; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrFp]] %[[TyPtrInt8]] +; CHECK-DAG: %[[TyUncompleteFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrInt8]] +; CHECK-DAG: %[[TyPtrUncompleteFp:.*]] = OpTypePointer Function %[[TyUncompleteFp]] +; CHECK-DAG: %[[TyBar:.*]] = OpTypeFunction %[[TyInt64]] %[[TyPtrUncompleteFp]] %[[TyPtrInt8]] ; CHECK-DAG: %[[TyPtrBar:.*]] = OpTypePointer Function %[[TyBar]] +; CHECK-DAG: %[[TyFp:.*]] = OpTypeFunction %[[TyFloat32]] %[[TyPtrBar]] +; CHECK-DAG: %[[TyPtrFp:.*]] = OpTypePointer Function %[[TyFp]] ; CHECK-DAG: %[[TyTest:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrFp]] %[[TyPtrInt8]] %[[TyPtrBar]] ; CHECK: %[[test]] = OpFunction %[[TyVoid]] None %[[TyTest]] ; CHECK: %[[fp]] = OpFunctionParameter %[[TyPtrFp]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll index 9374e154a0239..13667f44389e7 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroups/cl_intel_sub_groups.ll @@ -37,7 +37,8 @@ ; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroups %s -o - -filetype=obj | spirv-val %} ; CHECK-ERROR: LLVM ERROR: intel_sub_group_shuffle: the builtin requires the following SPIR-V extension: SPV_INTEL_subgroups diff --git a/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll b/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll index bf71eb5628e21..82b115c77987f 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll @@ -13,16 +13,16 @@ ; CHECK-DAG: %[[Struct2:.*]] = OpTypeStruct %[[Struct1]] ; CHECK-DAG: %[[StructPtr:.*]] = OpTypePointer Function %[[Struct2]] ; CHECK-DAG: %[[Bool:.*]] = OpTypeBool -; CHECK-DAG: %[[FooType:.*]] = OpTypeFunction %[[StructPtr:.*]] %[[StructPtr]] %[[StructPtr]] %[[Bool]] +; CHECK-DAG: %[[FooType:.*]] = OpTypeFunction %[[StructPtr]] %[[StructPtr]] %[[StructPtr]] %[[Bool]] ; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 ; CHECK-DAG: %[[CharPtr:.*]] = OpTypePointer Function %[[Char]] ; CHECK: %[[Test1]] = OpFunction -; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Foo]] -; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Bar]] +; CHECK: OpFunctionCall %[[StructPtr]] %[[Foo]] +; CHECK: OpFunctionCall %[[CharPtr]] %[[Bar]] ; CHECK: OpFunctionEnd -; CHECK: %[[Foo]] = OpFunction %[[StructPtr:.*]] None %[[FooType]] +; CHECK: %[[Foo]] = OpFunction %[[StructPtr]] None %[[FooType]] ; CHECK: %[[Arg1:.*]] = OpFunctionParameter %[[StructPtr]] ; CHECK: %[[Arg2:.*]] = OpFunctionParameter ; CHECK: %[[Sw:.*]] = OpFunctionParameter @@ -30,17 +30,18 @@ ; CHECK: OpReturnValue %[[Res]] ; CHECK: OpReturnValue %[[Arg2]] -; CHECK: %[[Bar]] = OpFunction %[[StructPtr:.*]] None %[[#]] -; CHECK: %[[BarArg:.*]] = OpFunctionParameter -; CHECK: %[[BarRes:.*]] = OpInBoundsPtrAccessChain %[[CharPtr]] %[[BarArg]] %[[#]] -; CHECK: %[[BarResCasted:.*]] = OpBitcast %[[StructPtr]] %[[BarRes]] +; CHECK: %[[Bar]] = OpFunction %[[CharPtr]] None %[[#]] +; CHECK: %[[BarArg:.*]] = OpFunctionParameter %[[StructPtr]] +; CHECK: %[[BarArgCasted:.*]] = OpBitcast %[[CharPtr]] %[[BarArg]] +; CHECK: %[[BarRes:.*]] = OpInBoundsPtrAccessChain %[[CharPtr]] %[[BarArgCasted]] %[[#]] ; CHECK: %[[BarResStruct:.*]] = OpInBoundsPtrAccessChain %[[StructPtr]] %[[#]] %[[#]] -; CHECK: OpReturnValue %[[BarResStruct]] -; CHECK: OpReturnValue %[[BarResCasted]] +; CHECK: %[[BarResStructCasted:.*]] = OpBitcast %[[CharPtr]] %[[BarResStruct]] +; CHECK: OpReturnValue %[[BarResStructCasted]] +; CHECK: OpReturnValue %[[BarRes]] ; CHECK: %[[Test2]] = OpFunction -; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Foo]] -; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Bar]] +; CHECK: OpFunctionCall %[[StructPtr]] %[[Foo]] +; CHECK: OpFunctionCall %[[CharPtr]] %[[Bar]] ; CHECK: OpFunctionEnd %struct = type { %array } diff --git a/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll new file mode 100644 index 0000000000000..a846e1936d7ac --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/builtin-ret-reg-type.ll @@ -0,0 +1,50 @@ +; The goal of the test case is to ensure that correct types are applied to virtual registers which were +; used as return values in call lowering. Pass criterion is that spirv-val considers output valid. + +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +%t_half = type { half } +%t_i17 = type { [17 x i32] } +%t_h17 = type { [17 x %t_half] } + +define internal spir_func void @foo(i64 %arrayinit.cur.add_4, half %r1, ptr addrspace(4) noundef align 8 dereferenceable_or_null(72) %this) { +entry: + %r_3 = alloca %t_h17, align 8 + %p_src = alloca %t_i17, align 4 + %p_src4 = addrspacecast ptr %p_src to ptr addrspace(4) + %call_2 = call spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef %p_src4, i32 noundef 7) + br label %l_body + +l_body: ; preds = %l_body, %entry + %l_done = icmp eq i64 %arrayinit.cur.add_4, 34 + br i1 %l_done, label %exit, label %l_body + +exit: ; preds = %l_body + %0 = addrspacecast ptr %call_2 to ptr addrspace(4) + %call_6 = call spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef %0, i32 noundef 7) + br label %for.cond_3 + +for.cond_3: ; preds = %for.body_3, %exit + %lsr.iv1 = phi ptr [ %scevgep2, %for.body_3 ], [ %call_6, %exit ] + %lsr.iv = phi ptr [ %scevgep, %for.body_3 ], [ %r_3, %exit ] + %i.0_3 = phi i64 [ 0, %exit ], [ %inc_3, %for.body_3 ] + %cmp_3 = icmp ult i64 %i.0_3, 17 + br i1 %cmp_3, label %for.body_3, label %exit2 + +for.body_3: ; preds = %for.cond_3 + %call2_5 = call spir_func noundef half @_Z17__spirv_ocl_frexpDF16_PU3AS0i(half noundef %r1, ptr noundef %lsr.iv1) + store half %call2_5, ptr %lsr.iv, align 2 + %inc_3 = add nuw nsw i64 %i.0_3, 1 + %scevgep = getelementptr i8, ptr %lsr.iv, i64 2 + %scevgep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + br label %for.cond_3 + +exit2: ; preds = %for.cond_3 + ret void +} + +declare dso_local spir_func noundef ptr @_Z42__spirv_GenericCastToPtrExplicit_ToPrivatePvi(ptr addrspace(4) noundef, i32 noundef) +declare dso_local spir_func noundef half @_Z17__spirv_ocl_frexpDF16_PU3AS0i(half noundef, ptr noundef) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) +declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) diff --git a/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll new file mode 100644 index 0000000000000..0e2730e18bf38 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/gep-types-1.ll @@ -0,0 +1,42 @@ +; The goal of the test is to ensure that type inference doesn't break validity of the generated SPIR-V code. +; The only pass criterion is that spirv-val considers output valid. + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#Char:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]] +; CHECK-DAG: %[[#PtrCharCW:]] = OpTypePointer CrossWorkgroup %[[#Char]] +; CHECK-DAG: %[[#PtrCharGen:]] = OpTypePointer Generic %[[#Char]] +; CHECK-DAG: %[[#Struct:]] = OpTypeStruct %[[#]] %[[#]] %[[#]] +; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]] +; CHECK-DAG: %[[#PtrPtrCharGen:]] = OpTypePointer Function %[[#PtrCharGen]] +; CHECK-DAG: %[[#PtrStruct:]] = OpTypePointer Function %[[#Struct]] +; CHECK: OpFunction +; CHECK: %[[#Arg1:]] = OpFunctionParameter %[[#Int]] +; CHECK: %[[#Arg2:]] = OpFunctionParameter %[[#PtrCharCW]] +; CHECK: %[[#Kernel:]] = OpVariable %[[#PtrStruct]] Function +; CHECK: %[[#IntKernel:]] = OpBitcast %[[#PtrInt]] %[[#Kernel]] +; CHECK: OpStore %[[#IntKernel]] %[[#Arg1]] +; CHECK: %[[#CharKernel:]] = OpBitcast %[[#PtrChar]] %[[#Kernel]] +; CHECK: %[[#P:]] = OpInBoundsPtrAccessChain %[[#PtrChar]] %[[#CharKernel]] %[[#]] +; CHECK: %[[#R0:]] = OpPtrCastToGeneric %[[#PtrCharGen]] %[[#Arg2]] +; CHECK: %[[#P2:]] = OpBitcast %[[#PtrPtrCharGen]] %[[#P]] +; CHECK: OpStore %[[#P2]] %[[#R0]] +; CHECK: %[[#P3:]] = OpBitcast %[[#PtrPtrCharGen]] %[[#P]] +; CHECK: %[[#]] = OpLoad %[[#PtrCharGen]] %[[#P3]] + +%"class.std::complex" = type { { double, double } } +%class.anon = type { i32, ptr addrspace(4), [2 x [2 x %"class.std::complex"]] } + +define weak_odr dso_local spir_kernel void @foo(i32 noundef %_arg_N, ptr addrspace(1) noundef align 8 %_arg_p) { +entry: + %Kernel = alloca %class.anon, align 8 + store i32 %_arg_N, ptr %Kernel, align 8 + %p = getelementptr inbounds i8, ptr %Kernel, i64 8 + %r0 = addrspacecast ptr addrspace(1) %_arg_p to ptr addrspace(4) + store ptr addrspace(4) %r0, ptr %p, align 8 + %r3 = load ptr addrspace(4), ptr %p, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll b/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll new file mode 100644 index 0000000000000..d94da31890ab1 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/gep-types-2.ll @@ -0,0 +1,36 @@ +; The goal of the test is to ensure that type inference doesn't break validity of the generated SPIR-V code. +; The only pass criterion is that spirv-val considers output valid. + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpFunction + +%class.anon = type { i32, ptr addrspace(4)} + +define weak_odr dso_local spir_kernel void @foo(i32 noundef %_arg_N, i1 %fl) { +entry: + %__SYCLKernel = alloca %class.anon, align 8 + store i32 %_arg_N, ptr %__SYCLKernel, align 8 + br label %arinit + +arinit: + %scevgep3 = getelementptr nuw i8, ptr %__SYCLKernel, i64 24 + br label %for.cond.i + +for.cond.i: + %lsr.iv4 = phi ptr [ %scevgep5, %for.body.i ], [ %scevgep3, %arinit ] + br i1 %fl, label %for.body.i, label %exit + +for.body.i: + %scevgep6 = getelementptr i8, ptr %lsr.iv4, i64 -8 + %_M_value.imag.i.i = load double, ptr %lsr.iv4, align 8 + %scevgep5 = getelementptr i8, ptr %lsr.iv4, i64 32 + br label %for.cond.i + +exit: + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll new file mode 100644 index 0000000000000..a9e79df259c4f --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/phi-chain-types.ll @@ -0,0 +1,82 @@ +; The goal of the test case is to ensure that correct types are applied to PHI's as arguments of other PHI's. +; Pass criterion is that spirv-val considers output valid. + +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +; CHECK-DAG: OpName %[[#Foo:]] "foo" +; CHECK-DAG: OpName %[[#FooVal1:]] "val1" +; CHECK-DAG: OpName %[[#FooVal2:]] "val2" +; CHECK-DAG: OpName %[[#FooVal3:]] "val3" +; CHECK-DAG: OpName %[[#Bar:]] "bar" +; CHECK-DAG: OpName %[[#BarVal1:]] "val1" +; CHECK-DAG: OpName %[[#BarVal2:]] "val2" +; CHECK-DAG: OpName %[[#BarVal3:]] "val3" + +; CHECK-DAG: %[[#Short:]] = OpTypeInt 16 0 +; CHECK-DAG: %[[#ShortGenPtr:]] = OpTypePointer Generic %[[#Short]] +; CHECK-DAG: %[[#ShortWrkPtr:]] = OpTypePointer Workgroup %[[#Short]] +; CHECK-DAG: %[[#G1:]] = OpVariable %[[#ShortWrkPtr]] Workgroup + +; CHECK: %[[#Foo:]] = OpFunction %[[#]] None %[[#]] +; CHECK: %[[#FooArgP:]] = OpFunctionParameter %[[#ShortGenPtr]] +; CHECK: OpFunctionParameter +; CHECK: OpFunctionParameter +; CHECK: OpFunctionParameter +; CHECK: %[[#FooG1:]] = OpPtrCastToGeneric %[[#ShortGenPtr]] %[[#G1]] +; CHECK: %[[#FooVal2]] = OpPhi %[[#ShortGenPtr]] %[[#FooArgP]] %[[#]] %[[#FooVal3]] %[[#]] +; CHECK: %[[#FooVal1]] = OpPhi %[[#ShortGenPtr]] %[[#FooG1]] %[[#]] %[[#FooVal2]] %[[#]] +; CHECK: %[[#FooVal3]] = OpLoad %[[#ShortGenPtr]] %[[#]] + +; CHECK: %[[#Bar:]] = OpFunction %[[#]] None %[[#]] +; CHECK: %[[#BarArgP:]] = OpFunctionParameter %[[#ShortGenPtr]] +; CHECK: OpFunctionParameter +; CHECK: OpFunctionParameter +; CHECK: OpFunctionParameter +; CHECK: %[[#BarVal3]] = OpLoad %[[#ShortGenPtr]] %[[#]] +; CHECK: %[[#BarG1:]] = OpPtrCastToGeneric %[[#ShortGenPtr]] %[[#G1]] +; CHECK: %[[#BarVal1]] = OpPhi %[[#ShortGenPtr]] %[[#BarG1]] %[[#]] %[[#BarVal2]] %[[#]] +; CHECK: %[[#BarVal2]] = OpPhi %[[#ShortGenPtr]] %[[#BarArgP]] %[[#]] %[[#BarVal3]] %[[#]] + +@G1 = internal addrspace(3) global i16 undef, align 8 +@G2 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 + +define spir_kernel void @foo(ptr addrspace(4) %p, i1 %f1, i1 %f2, i1 %f3) { +entry: + br label %l1 + +l1: + br i1 %f1, label %l2, label %exit + +l2: + %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ] + %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ] + br i1 %f2, label %l3, label %exit + +l3: + %val3 = load ptr addrspace(4), ptr addrspace(3) @G2, align 8 + br i1 %f3, label %l2, label %exit + +exit: + ret void +} + +define spir_kernel void @bar(ptr addrspace(4) %p, i1 %f1, i1 %f2, i1 %f3) { +entry: + %val3 = load ptr addrspace(4), ptr addrspace(3) @G2, align 8 + br label %l1 + +l3: + br i1 %f3, label %l2, label %exit + +l1: + br i1 %f1, label %l2, label %exit + +l2: + %val1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @G1 to ptr addrspace(4)), %l1 ], [ %val2, %l3 ] + %val2 = phi ptr addrspace(4) [ %p, %l1 ], [ %val3, %l3 ] + br i1 %f2, label %l3, label %exit + +exit: + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll index 07824d4ed6cd8..f4c8c5a79bcb7 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll @@ -1,15 +1,14 @@ ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK: %[[#Char:]] = OpTypeInt 8 0 -; CHECK: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]] -; CHECK: %[[#Int:]] = OpTypeInt 32 0 -; CHECK: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]] +; CHECK-DAG: %[[#Char:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#PtrChar:]] = OpTypePointer Function %[[#Char]] +; CHECK-DAG: %[[#Int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#PtrInt:]] = OpTypePointer Function %[[#Int]] ; CHECK: %[[#R1:]] = OpFunctionCall %[[#PtrChar]] %[[#]] ; CHECK: %[[#R2:]] = OpFunctionCall %[[#PtrInt]] %[[#]] ; CHECK: %[[#Casted:]] = OpBitcast %[[#PtrChar]] %[[#R2]] ; CHECK: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#Casted]] %[[#]] -; CHECK: OpPhi %[[#PtrChar]] %[[#R1]] %[[#]] %[[#Casted]] %[[#]] define ptr @foo(i1 %arg) { entry: diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll new file mode 100644 index 0000000000000..b0047ba82c36d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-via-store-load-args-rev.ll @@ -0,0 +1,64 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - --translator-compatibility-mode | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpName %[[#Bar:]] "bar" +; CHECK-DAG: OpName %[[#Foo:]] "foo" +; CHECK-DAG: OpName %[[#Test:]] "test" +; CHECK-DAG: %[[#Void:]] = OpTypeVoid +; CHECK-DAG: %[[#Long:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#LongArr:]] = OpTypeArray %[[#Long]] %[[#]] +; CHECK-DAG: %[[#StructLongArr:]] = OpTypeStruct %[[#LongArr]] +; CHECK-DAG: %[[#Struct:]] = OpTypeStruct %[[#StructLongArr]] +; CHECK-DAG: %[[#StructGenPtr:]] = OpTypePointer Generic %[[#Struct]] +; CHECK-DAG: %[[#StructFunPtr:]] = OpTypePointer Function %[[#Struct]] +; CHECK-DAG: %[[#StructGenGenPtr:]] = OpTypePointer Generic %[[#StructGenPtr]] +; CHECK-DAG: %[[#StructFunGenPtr:]] = OpTypePointer Function %[[#StructGenPtr]] + +; CHECK: %[[#Bar]] = OpFunction +; CHECK: %[[#BarVar:]] = OpVariable %[[#StructFunPtr]] Function +; CHECK: %[[#BarVarToGen:]] = OpPtrCastToGeneric %[[#StructGenPtr]] %[[#BarVar]] +; CHECK: %[[#]] = OpFunctionCall %[[#Void]] %[[#Foo]] %[[#BarVarToGen]] + +; CHECK: %[[#Foo]] = OpFunction +; CHECK: %[[#FooArg1:]] = OpFunctionParameter %[[#StructGenPtr]] +; CHECK: %[[#FooVar:]] = OpVariable %[[#StructFunGenPtr]] Function +; CHECK: %[[#FooVarToGen:]] = OpPtrCastToGeneric %[[#StructGenGenPtr]] %[[#FooVar]] +; CHECK: OpStore %[[#FooVarToGen]] %[[#FooArg1]] +; CHECK: %[[#FooLoad:]] = OpLoad %[[#StructGenPtr]] %[[#FooVarToGen]] +; CHECK: %[[#]] = OpFunctionCall %[[#Void:]] %[[#Test]] %[[#FooLoad:]] + +; CHECK: %[[#Test]] = OpFunction +; CHECK: %[[#TestArg1:]] = OpFunctionParameter %[[#StructGenPtr]] +; CHECK: %[[#TestVar:]] = OpVariable %[[#StructFunGenPtr]] Function +; CHECK: %[[#TestVarToGen:]] = OpPtrCastToGeneric %[[#StructGenGenPtr]] %[[#TestVar]] +; CHECK: OpStore %[[#TestVarToGen]] %[[#TestArg1]] + +%t_range = type { %t_arr } +%t_arr = type { [1 x i64] } + +define internal spir_func void @bar() { + %GlobalOffset = alloca %t_range, align 8 + %GlobalOffset.ascast = addrspacecast ptr %GlobalOffset to ptr addrspace(4) + call spir_func void @foo(ptr addrspace(4) noundef align 8 dereferenceable(8) %GlobalOffset.ascast) + ret void +} + +define internal spir_func void @foo(ptr addrspace(4) noundef align 8 dereferenceable(8) %Offset) { +entry: + %Offset.addr = alloca ptr addrspace(4), align 8 + %Offset.addr.ascast = addrspacecast ptr %Offset.addr to ptr addrspace(4) + store ptr addrspace(4) %Offset, ptr addrspace(4) %Offset.addr.ascast, align 8 + %r2 = load ptr addrspace(4), ptr addrspace(4) %Offset.addr.ascast, align 8 + call spir_func void @test(ptr addrspace(4) noundef align 8 dereferenceable(8) %r2) + ret void +} + +define void @test(ptr addrspace(4) noundef align 8 dereferenceable(8) %offset) { + %offset.addr = alloca ptr addrspace(4), align 8 + %offset.addr.ascast = addrspacecast ptr %offset.addr to ptr addrspace(4) + store ptr addrspace(4) %offset, ptr addrspace(4) %offset.addr.ascast, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll index 54b2c78674776..2cba0f6ebd74b 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/OpGenericCastToPtr.ll @@ -2,9 +2,7 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-SPIRV-DAG: %[[#Char:]] = OpTypeInt 8 0 -; CHECK-SPIRV-DAG: %[[#GlobalCharPtr:]] = OpTypePointer CrossWorkgroup %[[#Char]] ; CHECK-SPIRV-DAG: %[[#LocalCharPtr:]] = OpTypePointer Workgroup %[[#Char]] -; CHECK-SPIRV-DAG: %[[#PrivateCharPtr:]] = OpTypePointer Function %[[#Char]] ; CHECK-SPIRV-DAG: %[[#GenericCharPtr:]] = OpTypePointer Generic %[[#Char]] ; CHECK-SPIRV-DAG: %[[#Int:]] = OpTypeInt 32 0 diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll index fcb61911e0d29..e512f909cfd05 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/spirv-event-null.ll @@ -64,6 +64,19 @@ declare dso_local spir_func target("spirv.Event") @_Z22__spirv_GroupAsyncCopyjPU ; CHECK: OpGroupWaitEvents %[[#]] %[[#]] %[[#EventVarBarGen]] ; CHECK: OpFunctionEnd +; CHECK2: OpFunction +; CHECK2: %[[#BarArg1:]] = OpFunctionParameter %[[#TyPtrSV4_W]] +; CHECK2: %[[#BarArg2:]] = OpFunctionParameter %[[#TyPtrSV4_CW]] +; CHECK2: %[[#EventVarBar:]] = OpVariable %[[#TyEventPtr]] Function +; CHECK2: %[[#SrcBar:]] = OpInBoundsPtrAccessChain %[[#TyPtrSV4_CW]] %[[#BarArg2]] %[[#]] +; CHECK2-DAG: %[[#BarArg1Casted:]] = OpBitcast %[[#TyPtrV4_W]] %[[#BarArg1]] +; CHECK2-DAG: %[[#SrcBarCasted:]] = OpBitcast %[[#TyPtrV4_CW]] %[[#SrcBar]] +; CHECK2: %[[#ResBar:]] = OpGroupAsyncCopy %[[#TyEvent]] %[[#]] %[[#BarArg1Casted]] %[[#SrcBarCasted]] %[[#]] %[[#]] %[[#ConstEvent]] +; CHECK2: OpStore %[[#EventVarBar]] %[[#ResBar]] +; CHECK2: %[[#EventVarBarGen:]] = OpPtrCastToGeneric %[[#TyEventPtrGen]] %[[#EventVarBar]] +; CHECK2: OpGroupWaitEvents %[[#]] %[[#]] %[[#EventVarBarGen]] +; CHECK2: OpFunctionEnd + %Vec4 = type { <4 x i8> } define spir_kernel void @bar(ptr addrspace(3) %_arg_Local, ptr addrspace(1) readonly %_arg) { diff --git a/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll b/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll new file mode 100644 index 0000000000000..77ed1d6fecf9a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/validate/sycl-hier-par-basic.ll @@ -0,0 +1,974 @@ +; This is an excerpt from the SYCL end-to-end test suite, cleaned out from unrelevant details, +; that reproduced multiple cases of the issues when OpPhi's result type mismatches with operand types. +; The only pass criterion is that spirv-val considers output valid. + +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +%struct.PFWGFunctor = type { i64, i64, i32, i32, %"class.sycl::_V1::accessor" } +%"class.sycl::_V1::accessor" = type { %"class.sycl::_V1::detail::AccessorImplDevice", %union.anon } +%"class.sycl::_V1::detail::AccessorImplDevice" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range" } +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [1 x i64] } +%union.anon = type { ptr addrspace(1) } +%class.anon.2 = type { %"class.sycl::_V1::accessor" } +%"class.sycl::_V1::group" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range" } +%"class.sycl::_V1::group.15" = type { %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16", %"class.sycl::_V1::range.16" } +%"class.sycl::_V1::range.16" = type { %"class.sycl::_V1::detail::array.17" } +%"class.sycl::_V1::detail::array.17" = type { [2 x i64] } +%"class.sycl::_V1::private_memory" = type { %struct.MyStruct } +%struct.MyStruct = type { i32, i32 } + +@GFunctor = internal addrspace(3) global %struct.PFWGFunctor undef, align 8 +@WI.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WI.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WI.2 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WI.3 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WI.4 = internal unnamed_addr addrspace(3) global i32 undef, align 8 +@WI.6 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@GCnt = internal unnamed_addr addrspace(3) global i32 undef, align 4 +@__spirv_BuiltInNumWorkgroups = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@GKernel1 = internal addrspace(3) global %class.anon.2 undef, align 8 +@GCnt2 = internal unnamed_addr addrspace(3) global i32 undef, align 4 +@GKernel2 = internal addrspace(3) global %class.anon.2 undef, align 8 +@GCnt3 = internal unnamed_addr addrspace(3) global i32 undef, align 4 +@GKernel3 = internal addrspace(3) global %class.anon.2 undef, align 8 +@GCnt4 = internal unnamed_addr addrspace(3) global i32 undef, align 4 +@GKernel4 = internal addrspace(3) global %class.anon.2 undef, align 8 +@GCnt5 = internal unnamed_addr addrspace(3) global i32 undef, align 4 +@__spirv_BuiltInLocalInvocationIndex = external local_unnamed_addr addrspace(1) constant i64, align 8 +@GThis = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@GAsCast = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@GCmp = internal unnamed_addr addrspace(3) global i1 undef, align 1 +@WGCopy = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@WGCopy.1.0 = internal unnamed_addr addrspace(3) global i64 undef, align 16 +@WGCopy.1.1 = internal unnamed_addr addrspace(3) global i64 undef, align 16 +@WGCopy.1.2 = internal unnamed_addr addrspace(3) global i64 undef, align 16 +@WGCopy.1.3 = internal unnamed_addr addrspace(3) global i64 undef, align 16 +@WGCopy.1.4 = internal unnamed_addr addrspace(3) global i32 undef, align 16 +@WGCopy.1.5 = internal unnamed_addr addrspace(3) global i32 undef, align 16 +@WGCopy.1.6 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 16 +@ArgShadow = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16 +@GAsCast2 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@GCmp2 = internal unnamed_addr addrspace(3) global i1 undef, align 1 +@WGCopy.3.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.4.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.5.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.6.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@ArgShadow.7 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16 +@GAscast3 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@GCmp3 = internal unnamed_addr addrspace(3) global i1 undef, align 1 +@WGCopy.9.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.10.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@ArgShadow.11 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16 +@GAsCast4 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@GCmp4 = internal unnamed_addr addrspace(3) global i1 undef, align 1 +@WGCopy.13.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.13.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.14.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@WGCopy.14.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@WGCopy.15.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.15.1 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.16.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@WGCopy.16.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@ArgShadow.17 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group.15" undef, align 16 +@GAsCast5 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@GCmp5 = internal unnamed_addr addrspace(3) global i1 undef, align 1 +@WGCopy.19.0 = internal unnamed_addr addrspace(3) global i64 undef, align 8 +@WGCopy.20.0 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@WGCopy.20.1 = internal unnamed_addr addrspace(3) global ptr addrspace(4) undef, align 8 +@ArgShadow.21 = internal unnamed_addr addrspace(3) global %"class.sycl::_V1::group" undef, align 16 +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInWorkgroupSize = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTS11PFWGFunctor(i64 noundef %_arg_wg_chunk, i64 noundef %_arg_range_length, i32 noundef %_arg_n_iter, i32 noundef %_arg_addend, ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) { +entry: + %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8 + store i64 %_arg_wg_chunk, ptr addrspace(3) @GFunctor, align 8 + store i64 %_arg_range_length, ptr addrspace(3) undef, align 8 + store i32 %_arg_n_iter, ptr addrspace(3) undef, align 8 + store i32 %_arg_addend, ptr addrspace(3) undef, align 4 + %0 = load i64, ptr %_arg_dev_ptr1, align 8 + %1 = load i64, ptr %_arg_dev_ptr2, align 8 + %2 = load i64, ptr %_arg_dev_ptr3, align 8 + store i64 %2, ptr addrspace(3) undef, align 8 + store i64 %0, ptr addrspace(3) undef, align 8 + store i64 %1, ptr addrspace(3) undef, align 8 + %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2 + store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32 + %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32 + %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32 + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67) + store i64 %3, ptr %agg.tmp67, align 1 + %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8 + store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16 + store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24 + store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8 + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %cmpz15.i = icmp eq i64 %7, 0 + br i1 %cmpz15.i, label %leader.i, label %merge.i + +leader.i: ; preds = %entry + call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false) + br label %merge.i + +merge.i: ; preds = %leader.i, %entry + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow, i64 32, i1 false) + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %wg_leader.i, label %wg_cf.i + +wg_leader.i: ; preds = %merge.i + %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4) + store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast, align 8 + store ptr addrspace(4) addrspacecast (ptr addrspace(3) @GFunctor to ptr addrspace(4)), ptr addrspace(3) @GThis, align 8 + %8 = load i32, ptr addrspace(3) undef, align 4 + %9 = load i64, ptr addrspace(3) @GFunctor, align 8 + %index.i = getelementptr inbounds i8, ptr %agg.tmp67, i64 24 + %10 = load i64, ptr %index.i, align 8 + %mul.i = mul i64 %9, %10 + %localRange.i = getelementptr inbounds i8, ptr %agg.tmp67, i64 8 + %11 = load i64, ptr %localRange.i, align 8 + %12 = load i64, ptr addrspace(3) undef, align 8 + store i64 %9, ptr addrspace(3) @WI.0, align 8 + store i64 %11, ptr addrspace(3) @WI.1, align 8 + store i64 %mul.i, ptr addrspace(3) @WI.2, align 8 + store i64 %12, ptr addrspace(3) @WI.3, align 8 + store i32 %8, ptr addrspace(3) @WI.4, align 8 + store ptr addrspace(4) undef, ptr addrspace(3) @WI.6, align 8 + store i32 0, ptr addrspace(3) @GCnt, align 4 + br label %wg_cf.i + +wg_cf.i: ; preds = %wg_leader.i, %merge.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %wg_val_this1.i = load ptr addrspace(4), ptr addrspace(3) @GThis, align 8 + %n_iter.i = getelementptr inbounds i8, ptr addrspace(4) %wg_val_this1.i, i64 16 + %13 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + br label %for.cond.i + +for.cond.i: ; preds = %wg_cf11.i, %wg_cf.i + %agg.tmp.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.0.0.copyload13, %wg_cf11.i ] + %agg.tmp.i.sroa.6.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.6.0.copyload15, %wg_cf11.i ] + %agg.tmp.i.sroa.7.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.7.0.copyload17, %wg_cf11.i ] + %agg.tmp.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.8.0.copyload19, %wg_cf11.i ] + %agg.tmp.i.sroa.9.0 = phi i32 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.9.0.copyload21, %wg_cf11.i ] + %agg.tmp.i.sroa.10.0 = phi i32 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.10.0.copyload23, %wg_cf11.i ] + %agg.tmp.i.sroa.11.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.11.0.copyload25, %wg_cf11.i ] + %this.addr.0.i = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GFunctor to ptr addrspace(4)), %wg_cf.i ], [ %mat_ld13.i, %wg_cf11.i ] + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %wg_leader4.i, label %wg_cf5.i + +wg_leader4.i: ; preds = %for.cond.i + %14 = load i32, ptr addrspace(3) @GCnt, align 4 + %15 = load i32, ptr addrspace(4) %n_iter.i, align 8 + %cmp.i = icmp slt i32 %14, %15 + store i1 %cmp.i, ptr addrspace(3) @GCmp, align 1 + br label %wg_cf5.i + +wg_cf5.i: ; preds = %wg_leader4.i, %for.cond.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp, align 1 + br i1 %wg_val_cmp.i, label %for.body.i, label %_ZNK11PFWGFunctorclEN4sycl3_V15groupILi1EEE.exit + +for.body.i: ; preds = %wg_cf5.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %wg_leader7.i, label %wg_cf8.i + +wg_leader7.i: ; preds = %for.body.i + %agg.tmp.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WI.0, align 8 + %agg.tmp.i.sroa.6.0.copyload = load i64, ptr addrspace(3) @WI.1, align 8 + %agg.tmp.i.sroa.7.0.copyload = load i64, ptr addrspace(3) @WI.2, align 8 + %agg.tmp.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WI.3, align 8 + %agg.tmp.i.sroa.9.0.copyload = load i32, ptr addrspace(3) @WI.4, align 8 + %agg.tmp.i.sroa.11.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WI.6, align 8 + br label %wg_cf8.i + +wg_cf8.i: ; preds = %wg_leader7.i, %for.body.i + %agg.tmp.i.sroa.0.1 = phi i64 [ %agg.tmp.i.sroa.0.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.0.0, %for.body.i ] + %agg.tmp.i.sroa.6.1 = phi i64 [ %agg.tmp.i.sroa.6.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.6.0, %for.body.i ] + %agg.tmp.i.sroa.7.1 = phi i64 [ %agg.tmp.i.sroa.7.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.7.0, %for.body.i ] + %agg.tmp.i.sroa.8.1 = phi i64 [ %agg.tmp.i.sroa.8.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.8.0, %for.body.i ] + %agg.tmp.i.sroa.9.1 = phi i32 [ %agg.tmp.i.sroa.9.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.9.0, %for.body.i ] + %agg.tmp.i.sroa.11.1 = phi ptr addrspace(4) [ %agg.tmp.i.sroa.11.0.copyload, %wg_leader7.i ], [ %agg.tmp.i.sroa.11.0, %for.body.i ] + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %TestMat.i, label %LeaderMat.i + +TestMat.i: ; preds = %wg_cf8.i + store i64 %agg.tmp.i.sroa.0.1, ptr addrspace(3) @WGCopy.1.0, align 16 + store i64 %agg.tmp.i.sroa.6.1, ptr addrspace(3) @WGCopy.1.1, align 16 + store i64 %agg.tmp.i.sroa.7.1, ptr addrspace(3) @WGCopy.1.2, align 16 + store i64 %agg.tmp.i.sroa.8.1, ptr addrspace(3) @WGCopy.1.3, align 16 + store i32 %agg.tmp.i.sroa.9.1, ptr addrspace(3) @WGCopy.1.4, align 16 + store i32 %agg.tmp.i.sroa.10.0, ptr addrspace(3) @WGCopy.1.5, align 16 + store ptr addrspace(4) %agg.tmp.i.sroa.11.1, ptr addrspace(3) @WGCopy.1.6, align 16 + store ptr addrspace(4) %this.addr.0.i, ptr addrspace(3) @WGCopy, align 8 + br label %LeaderMat.i + +LeaderMat.i: ; preds = %TestMat.i, %wg_cf8.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %mat_ld13.i = load ptr addrspace(4), ptr addrspace(3) @WGCopy, align 8 + %agg.tmp.i.sroa.0.0.copyload13 = load i64, ptr addrspace(3) @WGCopy.1.0, align 16 + %agg.tmp.i.sroa.6.0.copyload15 = load i64, ptr addrspace(3) @WGCopy.1.1, align 16 + %agg.tmp.i.sroa.7.0.copyload17 = load i64, ptr addrspace(3) @WGCopy.1.2, align 16 + %agg.tmp.i.sroa.8.0.copyload19 = load i64, ptr addrspace(3) @WGCopy.1.3, align 16 + %agg.tmp.i.sroa.9.0.copyload21 = load i32, ptr addrspace(3) @WGCopy.1.4, align 16 + %agg.tmp.i.sroa.10.0.copyload23 = load i32, ptr addrspace(3) @WGCopy.1.5, align 16 + %agg.tmp.i.sroa.11.0.copyload25 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.1.6, align 16 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + %cmp.not.i.i = icmp ult i64 %13, %agg.tmp.i.sroa.0.0.copyload13 + br i1 %cmp.not.i.i, label %if.end.i.i, label %lexit1 + +if.end.i.i: ; preds = %LeaderMat.i + %add.i.i = add i64 %agg.tmp.i.sroa.0.0.copyload13, %agg.tmp.i.sroa.6.0.copyload15 + %sub.i.i = add i64 %add.i.i, -1 + %div.i.i = udiv i64 %sub.i.i, %agg.tmp.i.sroa.6.0.copyload15 + %mul.i.i = mul i64 %13, %div.i.i + %add4.i.i = add i64 %agg.tmp.i.sroa.7.0.copyload17, %mul.i.i + %add6.i.i = add i64 %add4.i.i, %div.i.i + %.sroa.speculated.i.i = call i64 @llvm.umin.i64(i64 %agg.tmp.i.sroa.8.0.copyload19, i64 %add6.i.i) + %16 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp.i.sroa.11.0.copyload25, i64 24 + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.body.i.i, %if.end.i.i + %ind.0.i.i = phi i64 [ %add4.i.i, %if.end.i.i ], [ %inc.i.i, %for.body.i.i ] + %cmp8.i.i = icmp ult i64 %ind.0.i.i, %.sroa.speculated.i.i + br i1 %cmp8.i.i, label %for.body.i.i, label %lexit1 + +for.body.i.i: ; preds = %for.cond.i.i + %17 = load ptr addrspace(1), ptr addrspace(4) %16, align 8 + %arrayidx.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %17, i64 %ind.0.i.i + %18 = load i32, ptr addrspace(1) %arrayidx.i.i.i, align 4 + %add10.i.i = add nsw i32 %18, %agg.tmp.i.sroa.9.0.copyload21 + store i32 %add10.i.i, ptr addrspace(1) %arrayidx.i.i.i, align 4 + %inc.i.i = add nuw i64 %ind.0.i.i, 1 + br label %for.cond.i.i + +lexit1: ; preds = %for.cond.i.i, %LeaderMat.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %wg_leader10.i, label %wg_cf11.i + +wg_leader10.i: ; preds = %lexit1 + %19 = load i32, ptr addrspace(3) @GCnt, align 4 + %inc.i = add nsw i32 %19, 1 + store i32 %inc.i, ptr addrspace(3) @GCnt, align 4 + br label %wg_cf11.i + +wg_cf11.i: ; preds = %wg_leader10.i, %lexit1 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br label %for.cond.i + +_ZNK11PFWGFunctorclEN4sycl3_V15groupILi1EEE.exit: ; preds = %wg_cf5.i + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef, i32 noundef, i32 noundef) + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p3.i64(ptr noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.umin.i64(i64, i64) + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +; Function Attrs: convergent mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @bar(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) { +entry: + %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8 + %0 = load i64, ptr %_arg_dev_ptr1, align 8 + %1 = load i64, ptr %_arg_dev_ptr2, align 8 + %2 = load i64, ptr %_arg_dev_ptr3, align 8 + store i64 %2, ptr addrspace(3) @GKernel1, align 8 + store i64 %0, ptr addrspace(3) undef, align 8 + store i64 %1, ptr addrspace(3) undef, align 8 + %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2 + store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32 + %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32 + %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32 + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67) + store i64 %3, ptr %agg.tmp67, align 1 + %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8 + store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16 + store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24 + store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8 + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %cmpz27.i = icmp eq i64 %7, 0 + br i1 %cmpz27.i, label %leader.i, label %merge.i + +leader.i: ; preds = %entry + call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.7, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false) + br label %merge.i + +merge.i: ; preds = %leader.i, %entry + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.7, i64 32, i1 false) + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz27.i, label %wg_leader.i, label %wg_cf.i + +wg_leader.i: ; preds = %merge.i + %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4) + store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast2, align 8 + store i32 0, ptr addrspace(3) @GCnt2, align 4 + br label %wg_cf.i + +wg_cf.i: ; preds = %wg_leader.i, %merge.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %8 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %cmp.i.i.i.i.i.i = icmp ult i64 %8, 2147483648 + br label %for.cond.i + +for.cond.i: ; preds = %wg_cf18.i, %wg_cf.i + %agg.tmp5.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %18, %wg_cf18.i ] + %agg.tmp4.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %17, %wg_cf18.i ] + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz27.i, label %wg_leader8.i, label %wg_cf9.i + +wg_leader8.i: ; preds = %for.cond.i + %10 = load i32, ptr addrspace(3) @GCnt2, align 4 + %cmp.i = icmp slt i32 %10, 2 + store i1 %cmp.i, ptr addrspace(3) @GCmp2, align 1 + br label %wg_cf9.i + +wg_cf9.i: ; preds = %wg_leader8.i, %for.cond.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp2, align 1 + br i1 %wg_val_cmp.i, label %for.body.i, label %lexit2 + +for.body.i: ; preds = %wg_cf9.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz27.i, label %TestMat25.i, label %LeaderMat22.i + +TestMat25.i: ; preds = %for.body.i + store i64 %agg.tmp5.i.sroa.0.0, ptr addrspace(3) @WGCopy.6.0, align 8 + store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel1 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.4.0, align 8 + store i64 5, ptr addrspace(3) @WGCopy.3.0, align 8 + store i64 %agg.tmp4.i.sroa.0.0, ptr addrspace(3) @WGCopy.5.0, align 8 + br label %LeaderMat22.i + +LeaderMat22.i: ; preds = %TestMat25.i, %for.body.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %11 = load i64, ptr addrspace(3) @WGCopy.3.0, align 8 + %12 = load i64, ptr addrspace(3) @WGCopy.4.0, align 8 + %13 = inttoptr i64 %12 to ptr addrspace(4) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + %14 = getelementptr inbounds i8, ptr addrspace(4) %13, i64 24 + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.body.i.i, %LeaderMat22.i + %storemerge.i.i = phi i64 [ %9, %LeaderMat22.i ], [ %add.i.i, %for.body.i.i ] + %cmp.i.i = icmp ult i64 %storemerge.i.i, %11 + br i1 %cmp.i.i, label %for.body.i.i, label %lexit3 + +for.body.i.i: ; preds = %for.cond.i.i + call void @llvm.assume(i1 %cmp.i.i.i.i.i.i) + %15 = load ptr addrspace(1), ptr addrspace(4) %14, align 8 + %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %8 + %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4 + %inc.i.i.i.i = add nsw i32 %16, 1 + store i32 %inc.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4 + %add.i.i = add i64 %storemerge.i.i, %4 + br label %for.cond.i.i + +lexit3: ; preds = %for.cond.i.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz27.i, label %TestMat.i, label %LeaderMat.i + +TestMat.i: ; preds = %lexit3 + store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel1 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.6.0, align 8 + store i64 %12, ptr addrspace(3) @WGCopy.4.0, align 8 + store i64 %11, ptr addrspace(3) @WGCopy.3.0, align 8 + store i64 2, ptr addrspace(3) @WGCopy.5.0, align 8 + br label %LeaderMat.i + +LeaderMat.i: ; preds = %TestMat.i, %lexit3 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %17 = load i64, ptr addrspace(3) @WGCopy.5.0, align 8 + %18 = load i64, ptr addrspace(3) @WGCopy.6.0, align 8 + %19 = inttoptr i64 %18 to ptr addrspace(4) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + %20 = getelementptr inbounds i8, ptr addrspace(4) %19, i64 24 + br label %for.cond.i.i19 + +for.cond.i.i19: ; preds = %for.body.i.i22, %LeaderMat.i + %storemerge.i.i20 = phi i64 [ %9, %LeaderMat.i ], [ %add.i.i26, %for.body.i.i22 ] + %cmp.i.i21 = icmp ult i64 %storemerge.i.i20, %17 + br i1 %cmp.i.i21, label %for.body.i.i22, label %lexit4 + +for.body.i.i22: ; preds = %for.cond.i.i19 + call void @llvm.assume(i1 %cmp.i.i.i.i.i.i) + %21 = load ptr addrspace(1), ptr addrspace(4) %20, align 8 + %arrayidx.i.i.i.i.i23 = getelementptr inbounds i32, ptr addrspace(1) %21, i64 %8 + %22 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i23, align 4 + %inc.i.i.i.i25 = add nsw i32 %22, 1 + store i32 %inc.i.i.i.i25, ptr addrspace(1) %arrayidx.i.i.i.i.i23, align 4 + %add.i.i26 = add i64 %storemerge.i.i20, %4 + br label %for.cond.i.i19 + +lexit4: ; preds = %for.cond.i.i19 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz27.i, label %wg_leader17.i, label %wg_cf18.i + +wg_leader17.i: ; preds = %lexit4 + %23 = load i32, ptr addrspace(3) @GCnt2, align 4 + %inc.i = add nsw i32 %23, 1 + store i32 %inc.i, ptr addrspace(3) @GCnt2, align 4 + br label %wg_cf18.i + +wg_cf18.i: ; preds = %wg_leader17.i, %lexit4 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br label %for.cond.i + +lexit2: ; preds = %wg_cf9.i + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) + +; Function Attrs: convergent mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @test1(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) { +entry: + %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8 + %0 = load i64, ptr %_arg_dev_ptr1, align 8 + %1 = load i64, ptr %_arg_dev_ptr2, align 8 + %2 = load i64, ptr %_arg_dev_ptr3, align 8 + store i64 %2, ptr addrspace(3) @GKernel2, align 8 + store i64 %0, ptr addrspace(3) undef, align 8 + store i64 %1, ptr addrspace(3) undef, align 8 + %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2 + store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32 + %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32 + %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32 + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67) + store i64 %3, ptr %agg.tmp67, align 1 + %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8 + store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16 + store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24 + store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8 + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %cmpz15.i = icmp eq i64 %7, 0 + br i1 %cmpz15.i, label %leader.i, label %merge.i + +leader.i: ; preds = %entry + call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.11, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false) + br label %merge.i + +merge.i: ; preds = %leader.i, %entry + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.11, i64 32, i1 false) + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %wg_leader.i, label %wg_cf.i + +wg_leader.i: ; preds = %merge.i + %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4) + store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAscast3, align 8 + store i32 0, ptr addrspace(3) @GCnt3, align 4 + br label %wg_cf.i + +wg_cf.i: ; preds = %wg_leader.i, %merge.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %8 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %cmp.i.i.i.i.i.i = icmp ult i64 %8, 2147483648 + br label %for.cond.i + +for.cond.i: ; preds = %wg_cf11.i, %wg_cf.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %wg_leader4.i, label %wg_cf5.i + +wg_leader4.i: ; preds = %for.cond.i + %10 = load i32, ptr addrspace(3) @GCnt3, align 4 + %cmp.i = icmp slt i32 %10, 2 + store i1 %cmp.i, ptr addrspace(3) @GCmp3, align 1 + br label %wg_cf5.i + +wg_cf5.i: ; preds = %wg_leader4.i, %for.cond.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp3, align 1 + br i1 %wg_val_cmp.i, label %for.body.i, label %lexit6 + +for.body.i: ; preds = %wg_cf5.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %TestMat.i, label %LeaderMat.i + +TestMat.i: ; preds = %for.body.i + store i64 ptrtoint (ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel2 to ptr addrspace(4)) to i64), ptr addrspace(3) @WGCopy.10.0, align 8 + store i64 5, ptr addrspace(3) @WGCopy.9.0, align 8 + br label %LeaderMat.i + +LeaderMat.i: ; preds = %TestMat.i, %for.body.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %11 = load i64, ptr addrspace(3) @WGCopy.9.0, align 8 + %12 = load i64, ptr addrspace(3) @WGCopy.10.0, align 8 + %13 = inttoptr i64 %12 to ptr addrspace(4) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + %14 = getelementptr inbounds i8, ptr addrspace(4) %13, i64 24 + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.body.i.i, %LeaderMat.i + %storemerge.i.i = phi i64 [ %9, %LeaderMat.i ], [ %add.i.i, %for.body.i.i ] + %cmp.i.i = icmp ult i64 %storemerge.i.i, %11 + br i1 %cmp.i.i, label %for.body.i.i, label %lexit7 + +for.body.i.i: ; preds = %for.cond.i.i + %cmp5.not.i.i.i.i.i.i = icmp ne i64 %storemerge.i.i, %9 + %cond.i.i.i.i = zext i1 %cmp5.not.i.i.i.i.i.i to i32 + call void @llvm.assume(i1 %cmp.i.i.i.i.i.i) + %15 = load ptr addrspace(1), ptr addrspace(4) %14, align 8 + %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %8 + %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4 + %add.i.i.i.i = add nsw i32 %16, %cond.i.i.i.i + store i32 %add.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4 + %add.i.i = add i64 %storemerge.i.i, %4 + br label %for.cond.i.i + +lexit7: ; preds = %for.cond.i.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz15.i, label %wg_leader10.i, label %wg_cf11.i + +wg_leader10.i: ; preds = %lexit7 + %17 = load i32, ptr addrspace(3) @GCnt3, align 4 + %inc.i = add nsw i32 %17, 1 + store i32 %inc.i, ptr addrspace(3) @GCnt3, align 4 + br label %wg_cf11.i + +wg_cf11.i: ; preds = %wg_leader10.i, %lexit7 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br label %for.cond.i + +lexit6: ; preds = %wg_cf5.i + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67) + ret void +} + +; Function Attrs: convergent mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @test2(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) { +entry: + %priv.i = alloca %"class.sycl::_V1::private_memory", align 4 + %agg.tmp67 = alloca %"class.sycl::_V1::group.15", align 8 + %0 = load i64, ptr %_arg_dev_ptr1, align 8 + %1 = load i64, ptr %_arg_dev_ptr2, align 8 + %2 = load i64, ptr %_arg_dev_ptr3, align 8 + store i64 %2, ptr addrspace(3) @GKernel3, align 8 + store i64 %0, ptr addrspace(3) undef, align 8 + store i64 %1, ptr addrspace(3) undef, align 8 + %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2 + store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8 + %3 = load i64, ptr addrspace(1) undef, align 8 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32 + %5 = load i64, ptr addrspace(1) undef, align 8 + %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32 + %7 = load i64, ptr addrspace(1) undef, align 8 + %8 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32 + %9 = load i64, ptr addrspace(1) undef, align 8 + %10 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32 + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %agg.tmp67) + store i64 %3, ptr %agg.tmp67, align 1 + %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8 + store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16 + store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24 + store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.5.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 32 + store i64 %7, ptr %agg.tmp6.sroa.5.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.6.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 40 + store i64 %8, ptr %agg.tmp6.sroa.6.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.7.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 48 + store i64 %9, ptr %agg.tmp6.sroa.7.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.8.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 56 + store i64 %10, ptr %agg.tmp6.sroa.8.0.agg.tmp67.sroa_idx, align 1 + %11 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8 + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %cmpz32.i = icmp eq i64 %11, 0 + br i1 %cmpz32.i, label %leader.i, label %merge.i + +leader.i: ; preds = %entry + call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(64) @ArgShadow.17, ptr noundef nonnull align 8 dereferenceable(64) %agg.tmp67, i64 64, i1 false) + br label %merge.i + +merge.i: ; preds = %leader.i, %entry + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(64) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(64) @ArgShadow.17, i64 64, i1 false) + %priv.ascast.i = addrspacecast ptr %priv.i to ptr addrspace(4) + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %wg_leader.i, label %wg_cf.i + +wg_leader.i: ; preds = %merge.i + %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4) + store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast4, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %priv.i) + store i32 0, ptr addrspace(3) @GCnt4, align 4 + br label %wg_cf.i + +wg_cf.i: ; preds = %wg_leader.i, %merge.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %12 = load i64, ptr addrspace(1) undef, align 8 + %13 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %14 = load i64, ptr addrspace(1) undef, align 8 + %15 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %mul.i.i.i.i.i.i = mul i64 %12, %4 + %add.i.i.i.i.i.i = add i64 %mul.i.i.i.i.i.i, %13 + %cmp.i.i.i.i.i.i = icmp ult i64 %add.i.i.i.i.i.i, 2147483648 + %conv.i.i.i.i.i = trunc i64 %add.i.i.i.i.i.i to i32 + %y.i.i.i.i.i = getelementptr inbounds i8, ptr %priv.i, i64 4 + br label %for.cond.i + +for.cond.i: ; preds = %wg_cf20.i, %wg_cf.i + %agg.tmp6.i.sroa.9.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp6.i.sroa.9.0.copyload40, %wg_cf20.i ] + %agg.tmp5.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp5.i.sroa.0.0.copyload44, %wg_cf20.i ] + %agg.tmp5.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp5.i.sroa.8.0.copyload48, %wg_cf20.i ] + %agg.tmp2.i.sroa.0.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp2.i.sroa.0.0.copyload52, %wg_cf20.i ] + %agg.tmp2.i.sroa.8.0 = phi ptr addrspace(4) [ undef, %wg_cf.i ], [ %agg.tmp2.i.sroa.8.0.copyload56, %wg_cf20.i ] + %agg.tmp.i.sroa.0.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.0.0.copyload60, %wg_cf20.i ] + %agg.tmp.i.sroa.8.0 = phi i64 [ undef, %wg_cf.i ], [ %agg.tmp.i.sroa.8.0.copyload64, %wg_cf20.i ] + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %wg_leader10.i, label %wg_cf11.i + +wg_leader10.i: ; preds = %for.cond.i + %16 = load i32, ptr addrspace(3) @GCnt4, align 4 + %cmp.i = icmp slt i32 %16, 2 + store i1 %cmp.i, ptr addrspace(3) @GCmp4, align 1 + br label %wg_cf11.i + +wg_cf11.i: ; preds = %wg_leader10.i, %for.cond.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp4, align 1 + br i1 %wg_val_cmp.i, label %for.body.i, label %for.end.i + +for.body.i: ; preds = %wg_cf11.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %wg_leader13.i, label %wg_cf14.i + +wg_leader13.i: ; preds = %for.body.i + br label %wg_cf14.i + +wg_cf14.i: ; preds = %wg_leader13.i, %for.body.i + %agg.tmp2.i.sroa.0.1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GKernel3 to ptr addrspace(4)), %wg_leader13.i ], [ %agg.tmp2.i.sroa.0.0, %for.body.i ] + %agg.tmp2.i.sroa.8.1 = phi ptr addrspace(4) [ %priv.ascast.i, %wg_leader13.i ], [ %agg.tmp2.i.sroa.8.0, %for.body.i ] + %agg.tmp.i.sroa.0.1 = phi i64 [ 7, %wg_leader13.i ], [ %agg.tmp.i.sroa.0.0, %for.body.i ] + %agg.tmp.i.sroa.8.1 = phi i64 [ 3, %wg_leader13.i ], [ %agg.tmp.i.sroa.8.0, %for.body.i ] + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %TestMat30.i, label %LeaderMat27.i + +TestMat30.i: ; preds = %wg_cf14.i + store i64 %agg.tmp.i.sroa.0.1, ptr addrspace(3) @WGCopy.13.0, align 8 + store i64 %agg.tmp.i.sroa.8.1, ptr addrspace(3) @WGCopy.13.1, align 8 + store ptr addrspace(4) %agg.tmp2.i.sroa.0.1, ptr addrspace(3) @WGCopy.14.0, align 8 + store ptr addrspace(4) %agg.tmp2.i.sroa.8.1, ptr addrspace(3) @WGCopy.14.1, align 8 + store i64 %agg.tmp5.i.sroa.0.0, ptr addrspace(3) @WGCopy.15.0, align 8 + store i64 %agg.tmp5.i.sroa.8.0, ptr addrspace(3) @WGCopy.15.1, align 8 + store ptr addrspace(4) %priv.ascast.i, ptr addrspace(3) @WGCopy.16.0, align 8 + store ptr addrspace(4) %agg.tmp6.i.sroa.9.0, ptr addrspace(3) @WGCopy.16.1, align 8 + br label %LeaderMat27.i + +LeaderMat27.i: ; preds = %TestMat30.i, %wg_cf14.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %agg.tmp6.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.0, align 8 + %agg.tmp6.i.sroa.9.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.1, align 8 + %agg.tmp5.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WGCopy.15.0, align 8 + %agg.tmp5.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WGCopy.15.1, align 8 + %agg.tmp2.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.0, align 8 + %agg.tmp.i.sroa.0.0.copyload = load i64, ptr addrspace(3) @WGCopy.13.0, align 8 + %agg.tmp.i.sroa.8.0.copyload = load i64, ptr addrspace(3) @WGCopy.13.1, align 8 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + %17 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, i64 24 + br label %for.cond.i.i + +for.cond.i.i: ; preds = %lexit10, %LeaderMat27.i + %storemerge.i.i = phi i64 [ %14, %LeaderMat27.i ], [ %add.i.i, %lexit10 ] + %cmp.i.i = icmp ult i64 %storemerge.i.i, %agg.tmp.i.sroa.0.0.copyload + br i1 %cmp.i.i, label %for.cond.i.i.i, label %lexit11 + +for.cond.i.i.i: ; preds = %for.body.i.i.i, %for.cond.i.i + %storemerge.i.i.i = phi i64 [ %add.i.i.i, %for.body.i.i.i ], [ %15, %for.cond.i.i ] + %cmp.i.i.i = icmp ult i64 %storemerge.i.i.i, %agg.tmp.i.sroa.8.0.copyload + br i1 %cmp.i.i.i, label %for.body.i.i.i, label %lexit10 + +for.body.i.i.i: ; preds = %for.cond.i.i.i + call void @llvm.assume(i1 %cmp.i.i.i.i.i.i) + %18 = load ptr addrspace(1), ptr addrspace(4) %17, align 8 + %arrayidx.i.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %18, i64 %add.i.i.i.i.i.i + %19 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i.i, align 4 + %inc.i.i.i.i.i = add nsw i32 %19, 1 + store i32 %inc.i.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i.i, align 4 + store i32 %conv.i.i.i.i.i, ptr %priv.i, align 4 + store i32 5, ptr %y.i.i.i.i.i, align 4 + %add.i.i.i = add i64 %storemerge.i.i.i, %6 + br label %for.cond.i.i.i + +lexit10: ; preds = %for.cond.i.i.i + %add.i.i = add i64 %storemerge.i.i, %5 + br label %for.cond.i.i + +lexit11: ; preds = %for.cond.i.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %wg_leader16.i, label %wg_cf17.i + +wg_leader16.i: ; preds = %lexit11 + br label %wg_cf17.i + +wg_cf17.i: ; preds = %wg_leader16.i, %lexit11 + %agg.tmp6.i.sroa.0.1 = phi ptr addrspace(4) [ %priv.ascast.i, %wg_leader16.i ], [ %agg.tmp6.i.sroa.0.0.copyload, %lexit11 ] + %agg.tmp6.i.sroa.9.1 = phi ptr addrspace(4) [ addrspacecast (ptr addrspace(3) @GKernel3 to ptr addrspace(4)), %wg_leader16.i ], [ %agg.tmp6.i.sroa.9.0.copyload, %lexit11 ] + %agg.tmp5.i.sroa.0.1 = phi i64 [ 7, %wg_leader16.i ], [ %agg.tmp5.i.sroa.0.0.copyload, %lexit11 ] + %agg.tmp5.i.sroa.8.1 = phi i64 [ 3, %wg_leader16.i ], [ %agg.tmp5.i.sroa.8.0.copyload, %lexit11 ] + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %TestMat.i, label %LeaderMat.i + +TestMat.i: ; preds = %wg_cf17.i + store i64 %agg.tmp.i.sroa.0.0.copyload, ptr addrspace(3) @WGCopy.13.0, align 8 + store i64 %agg.tmp.i.sroa.8.0.copyload, ptr addrspace(3) @WGCopy.13.1, align 8 + store ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, ptr addrspace(3) @WGCopy.14.0, align 8 + store ptr addrspace(4) %priv.ascast.i, ptr addrspace(3) @WGCopy.14.1, align 8 + store i64 %agg.tmp5.i.sroa.0.1, ptr addrspace(3) @WGCopy.15.0, align 8 + store i64 %agg.tmp5.i.sroa.8.1, ptr addrspace(3) @WGCopy.15.1, align 8 + store ptr addrspace(4) %agg.tmp6.i.sroa.0.1, ptr addrspace(3) @WGCopy.16.0, align 8 + store ptr addrspace(4) %agg.tmp6.i.sroa.9.1, ptr addrspace(3) @WGCopy.16.1, align 8 + br label %LeaderMat.i + +LeaderMat.i: ; preds = %TestMat.i, %wg_cf17.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %agg.tmp6.i.sroa.9.0.copyload40 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.16.1, align 8 + %agg.tmp5.i.sroa.0.0.copyload44 = load i64, ptr addrspace(3) @WGCopy.15.0, align 8 + %agg.tmp5.i.sroa.8.0.copyload48 = load i64, ptr addrspace(3) @WGCopy.15.1, align 8 + %agg.tmp2.i.sroa.0.0.copyload52 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.0, align 8 + %agg.tmp2.i.sroa.8.0.copyload56 = load ptr addrspace(4), ptr addrspace(3) @WGCopy.14.1, align 8 + %agg.tmp.i.sroa.0.0.copyload60 = load i64, ptr addrspace(3) @WGCopy.13.0, align 8 + %agg.tmp.i.sroa.8.0.copyload64 = load i64, ptr addrspace(3) @WGCopy.13.1, align 8 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + %20 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp6.i.sroa.9.0.copyload40, i64 24 + br label %for.cond.i.i25 + +for.cond.i.i25: ; preds = %lexit12, %LeaderMat.i + %storemerge.i.i26 = phi i64 [ %14, %LeaderMat.i ], [ %add.i.i31, %lexit12 ] + %cmp.i.i27 = icmp ult i64 %storemerge.i.i26, %agg.tmp5.i.sroa.0.0.copyload44 + br i1 %cmp.i.i27, label %for.cond.i.i.i28, label %lexit13 + +for.cond.i.i.i28: ; preds = %for.body.i.i.i32, %for.cond.i.i25 + %storemerge.i.i.i29 = phi i64 [ %add.i.i.i35, %for.body.i.i.i32 ], [ %15, %for.cond.i.i25 ] + %cmp.i.i.i30 = icmp ult i64 %storemerge.i.i.i29, %agg.tmp5.i.sroa.8.0.copyload48 + br i1 %cmp.i.i.i30, label %for.body.i.i.i32, label %lexit12 + +for.body.i.i.i32: ; preds = %for.cond.i.i.i28 + %21 = load i32, ptr %priv.i, align 4 + %22 = load i32, ptr %y.i.i.i.i.i, align 4 + %add.i.i.i.i.i = add nsw i32 %21, %22 + call void @llvm.assume(i1 %cmp.i.i.i.i.i.i) + %23 = load ptr addrspace(1), ptr addrspace(4) %20, align 8 + %arrayidx.i.i.i.i.i.i33 = getelementptr inbounds i32, ptr addrspace(1) %23, i64 %add.i.i.i.i.i.i + %24 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i.i33, align 4 + %add4.i.i.i.i.i = add nsw i32 %24, %add.i.i.i.i.i + store i32 %add4.i.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i.i33, align 4 + %add.i.i.i35 = add i64 %storemerge.i.i.i29, %6 + br label %for.cond.i.i.i28 + +lexit12: ; preds = %for.cond.i.i.i28 + %add.i.i31 = add i64 %storemerge.i.i26, %5 + br label %for.cond.i.i25 + +lexit13: ; preds = %for.cond.i.i25 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %wg_leader19.i, label %wg_cf20.i + +wg_leader19.i: ; preds = %lexit13 + %25 = load i32, ptr addrspace(3) @GCnt4, align 4 + %inc.i = add nsw i32 %25, 1 + store i32 %inc.i, ptr addrspace(3) @GCnt4, align 4 + br label %wg_cf20.i + +wg_cf20.i: ; preds = %wg_leader19.i, %lexit13 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br label %for.cond.i + +for.end.i: ; preds = %wg_cf11.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz32.i, label %wg_leader22.i, label %lexit14 + +wg_leader22.i: ; preds = %for.end.i + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %priv.i) + br label %lexit14 + +lexit14: ; preds = %wg_leader22.i, %for.end.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %agg.tmp67) + ret void +} + +; Function Attrs: convergent mustprogress norecurse nounwind +define weak_odr dso_local spir_kernel void @test3(ptr addrspace(1) noundef align 4 %_arg_dev_ptr, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr2, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_dev_ptr3) { +entry: + %agg.tmp67 = alloca %"class.sycl::_V1::group", align 8 + %0 = load i64, ptr %_arg_dev_ptr1, align 8 + %1 = load i64, ptr %_arg_dev_ptr2, align 8 + %2 = load i64, ptr %_arg_dev_ptr3, align 8 + store i64 %2, ptr addrspace(3) @GKernel4, align 8 + store i64 %0, ptr addrspace(3) undef, align 8 + store i64 %1, ptr addrspace(3) undef, align 8 + %add.ptr.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_dev_ptr, i64 %2 + store ptr addrspace(1) %add.ptr.i, ptr addrspace(3) undef, align 8 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalSize, align 32 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupSize, align 32 + %5 = load i64, ptr addrspace(1) @__spirv_BuiltInNumWorkgroups, align 32 + %6 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32 + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %agg.tmp67) + store i64 %3, ptr %agg.tmp67, align 1 + %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 8 + store i64 %4, ptr %agg.tmp6.sroa.2.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 16 + store i64 %5, ptr %agg.tmp6.sroa.3.0.agg.tmp67.sroa_idx, align 1 + %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx = getelementptr inbounds i8, ptr %agg.tmp67, i64 24 + store i64 %6, ptr %agg.tmp6.sroa.4.0.agg.tmp67.sroa_idx, align 1 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationIndex, align 8 + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %cmpz16.i = icmp eq i64 %7, 0 + br i1 %cmpz16.i, label %leader.i, label %merge.i + +leader.i: ; preds = %entry + call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.21, ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, i64 32, i1 false) + br label %merge.i + +merge.i: ; preds = %leader.i, %entry + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call void @llvm.memcpy.p0.p3.i64(ptr noundef nonnull align 8 dereferenceable(32) %agg.tmp67, ptr addrspace(3) noundef align 16 dereferenceable(32) @ArgShadow.21, i64 32, i1 false) + tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz16.i, label %wg_leader.i, label %wg_cf.i + +wg_leader.i: ; preds = %merge.i + %g.ascast.i = addrspacecast ptr %agg.tmp67 to ptr addrspace(4) + store ptr addrspace(4) %g.ascast.i, ptr addrspace(3) @GAsCast5, align 8 + store i32 0, ptr addrspace(3) @GCnt5, align 4 + br label %wg_cf.i + +wg_cf.i: ; preds = %wg_leader.i, %merge.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %wg_val_g.ascast.i = load ptr addrspace(4), ptr addrspace(3) @GAsCast5, align 8 + %8 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %9 = trunc i64 %4 to i32 + br label %for.cond.i + +for.cond.i: ; preds = %wg_cf12.i, %wg_cf.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz16.i, label %wg_leader5.i, label %wg_cf6.i + +wg_leader5.i: ; preds = %for.cond.i + %10 = load i32, ptr addrspace(3) @GCnt5, align 4 + %cmp.i = icmp slt i32 %10, 2 + store i1 %cmp.i, ptr addrspace(3) @GCmp5, align 1 + br label %wg_cf6.i + +wg_cf6.i: ; preds = %wg_leader5.i, %for.cond.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %wg_val_cmp.i = load i1, ptr addrspace(3) @GCmp5, align 1 + br i1 %wg_val_cmp.i, label %for.body.i, label %lexit20 + +for.body.i: ; preds = %wg_cf6.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz16.i, label %TestMat.i, label %LeaderMat.i + +TestMat.i: ; preds = %for.body.i + store ptr addrspace(4) %wg_val_g.ascast.i, ptr addrspace(3) @WGCopy.20.0, align 8 + store ptr addrspace(4) addrspacecast (ptr addrspace(3) @GKernel4 to ptr addrspace(4)), ptr addrspace(3) @WGCopy.20.1, align 8 + store i64 5, ptr addrspace(3) @WGCopy.19.0, align 8 + br label %LeaderMat.i + +LeaderMat.i: ; preds = %TestMat.i, %for.body.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + %11 = load i64, ptr addrspace(3) @WGCopy.19.0, align 8 + %agg.tmp2.i.sroa.0.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.20.0, align 8 + %agg.tmp2.i.sroa.6.0.copyload = load ptr addrspace(4), ptr addrspace(3) @WGCopy.20.1, align 8 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + %index.i.i.i.i.i = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.0.0.copyload, i64 24 + %12 = getelementptr inbounds i8, ptr addrspace(4) %agg.tmp2.i.sroa.6.0.copyload, i64 24 + %13 = trunc i64 %11 to i32 + br label %for.cond.i.i + +for.cond.i.i: ; preds = %for.body.i.i, %LeaderMat.i + %storemerge.i.i = phi i64 [ %8, %LeaderMat.i ], [ %add.i.i, %for.body.i.i ] + %cmp.i.i = icmp ult i64 %storemerge.i.i, %11 + br i1 %cmp.i.i, label %for.body.i.i, label %lexit21 + +for.body.i.i: ; preds = %for.cond.i.i + %14 = load i64, ptr addrspace(4) %index.i.i.i.i.i, align 8 + %mul.i.i.i.i = mul i64 %14, 10 + %mul3.i.i.i.i = shl i64 %storemerge.i.i, 1 + %add.i.i.i.i = add i64 %mul.i.i.i.i, %mul3.i.i.i.i + %15 = load ptr addrspace(1), ptr addrspace(4) %12, align 8 + %arrayidx.i.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %15, i64 %add.i.i.i.i + %16 = load i32, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4 + %conv9.i.i.i.i = add i32 %16, %13 + store i32 %conv9.i.i.i.i, ptr addrspace(1) %arrayidx.i.i.i.i.i, align 4 + %add14.i.i.i.i = or disjoint i64 %add.i.i.i.i, 1 + %17 = load ptr addrspace(1), ptr addrspace(4) %12, align 8 + %arrayidx.i25.i.i.i.i = getelementptr inbounds i32, ptr addrspace(1) %17, i64 %add14.i.i.i.i + %18 = load i32, ptr addrspace(1) %arrayidx.i25.i.i.i.i, align 4 + %conv18.i.i.i.i = add i32 %18, %9 + store i32 %conv18.i.i.i.i, ptr addrspace(1) %arrayidx.i25.i.i.i.i, align 4 + %add.i.i = add i64 %storemerge.i.i, %4 + br label %for.cond.i.i + +lexit21: ; preds = %for.cond.i.i + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 2, i32 noundef 2, i32 noundef 272) + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br i1 %cmpz16.i, label %wg_leader11.i, label %wg_cf12.i + +wg_leader11.i: ; preds = %lexit21 + %19 = load i32, ptr addrspace(3) @GCnt5, align 4 + %inc.i = add nsw i32 %19, 1 + store i32 %inc.i, ptr addrspace(3) @GCnt5, align 4 + br label %wg_cf12.i + +wg_cf12.i: ; preds = %wg_leader11.i, %lexit21 + call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) + br label %for.cond.i + +lexit20: ; preds = %wg_cf6.i + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %agg.tmp67) + ret void +}