@@ -650,6 +650,10 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
650650 // is alloca in the callee. Save the total private memory to the metadata.
651651 unsigned int totalPrivateMemPerWI = m_ModAllocaInfo->getTotalPrivateMemPerWI (m_currFunction);
652652
653+ // 32 is max simd width
654+ bool safe32bitOffset = m_currFunction->getParent ()->getDataLayout ().getPointerSize () < 8
655+ || (totalPrivateMemPerWI * 32ull * Ctx.platform .getMaxAddressedHWThreads ()) <= (uint64_t )UINT32_MAX;
656+
653657 // This change is only till the FuncMD is ported to new MD framework
654658 ModuleMetaData* const modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData ();
655659 IGC_ASSERT (nullptr != modMD);
@@ -708,6 +712,7 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
708712 LLVMContext& C = m_currFunction->getContext ();
709713
710714 IntegerType* typeInt32 = Type::getInt32Ty (C);
715+ IntegerType* typeInt64 = Type::getInt64Ty (C);
711716 // Creates intrinsics that will be lowered in the CodeGen and will handle the simd lane id
712717 Function* simdLaneIdFunc = GenISAIntrinsic::getDeclaration (m_currFunction->getParent (), GenISAIntrinsic::GenISA_simdLaneId);
713718 // Creates intrinsics that will be lowered in the CodeGen and will handle the simd size
@@ -861,10 +866,14 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
861866
862867 ConstantInt* totalPrivateMemPerWIValue = ConstantInt::get (typeInt32, totalPrivateMemPerWI);
863868 Value* totalPrivateMemPerThread = entryBuilder.CreateMul (simdSize, totalPrivateMemPerWIValue, VALUE_NAME (" totalPrivateMemPerThread" ));
869+ if (!safe32bitOffset)
870+ totalPrivateMemPerThread = entryBuilder.CreateZExt (totalPrivateMemPerThread, typeInt64);
864871
865872 Function* pHWTIDFunc = GenISAIntrinsic::getDeclaration (m_currFunction->getParent (), GenISAIntrinsic::GenISA_hw_thread_id_alloca, Type::getInt32Ty (C));
866- llvm::Value* threadId = entryBuilder.CreateCall (pHWTIDFunc);
867- llvm::Value* perThreadOffset = entryBuilder.CreateMul (threadId, totalPrivateMemPerThread, VALUE_NAME (" perThreadOffset" ));
873+ Value* threadId = entryBuilder.CreateCall (pHWTIDFunc);
874+ if (!safe32bitOffset)
875+ threadId = entryBuilder.CreateZExt (threadId, typeInt64);
876+ Value* perThreadOffset = entryBuilder.CreateMul (threadId, totalPrivateMemPerThread, VALUE_NAME (" perThreadOffset" ));
868877 perThreadOffset = entryBuilder.CreateZExt (perThreadOffset, privateBase->getType ());
869878 privateBase = entryBuilder.CreateAdd (privateBase, perThreadOffset);
870879 }
@@ -963,17 +972,21 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
963972 // %simdLaneId = zext i16 simdLaneId16 to i32
964973 // %simdSize = call i32 @llvm.gen.simdSize()
965974 // %totalPrivateMemPerThread = mul i32 %simdSize, <totalPrivateMemPerWI>
975+ // %totalPrivateMemPerThread = zext i32 %totalPrivateMemPerThread to i64
966976
967977 // %r0.5 = extractelement <8 x i32> %r0, i32 5
968978 // %threadId = and i32 %r0.5, 0x1FF|0x3FF (Thread ID is in the lower 9 bits or 10 bit(KBL & CNL+) of r0.5)
969- // %perThreadOffset = mul i32 %threadId, %totalPrivateMemPerThread
979+ // %threadId = zext i32 %threadId to i64
980+ // %perThreadOffset = mul i64 %threadId, %totalPrivateMemPerThread
970981
971982 ConstantInt* totalPrivateMemPerWIValue = ConstantInt::get (typeInt32, totalPrivateMemPerWI);
972983
973984 Instruction* simdLaneId16 = entryBuilder.CreateCall (simdLaneIdFunc, llvm::None, VALUE_NAME (" simdLaneId16" ));
974985 Value* simdLaneId = entryBuilder.CreateIntCast (simdLaneId16, typeInt32, false , VALUE_NAME (" simdLaneId" ));
975986 Instruction* simdSize = entryBuilder.CreateCall (simdSizeFunc, llvm::None, VALUE_NAME (" simdSize" ));
976987 Value* totalPrivateMemPerThread = entryBuilder.CreateMul (simdSize, totalPrivateMemPerWIValue, VALUE_NAME (" totalPrivateMemPerThread" ));
988+ if (!safe32bitOffset)
989+ totalPrivateMemPerThread = entryBuilder.CreateZExt (totalPrivateMemPerThread, typeInt64);
977990
978991 Function* pHWTIDFunc = GenISAIntrinsic::getDeclaration (m_currFunction->getParent (), GenISAIntrinsic::GenISA_hw_thread_id_alloca, Type::getInt32Ty (C));
979992 Value* threadId = entryBuilder.CreateCall (pHWTIDFunc);
@@ -990,6 +1003,8 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
9901003
9911004 threadId = entryBuilder.CreateOr (FFSID, shlThreadID, VALUE_NAME (" threadId" ));
9921005 }
1006+ if (!safe32bitOffset)
1007+ threadId = entryBuilder.CreateZExt (threadId, typeInt64);
9931008
9941009 Value* perThreadOffset = entryBuilder.CreateMul (threadId, totalPrivateMemPerThread, VALUE_NAME (" perThreadOffset" ));
9951010 auto perThreadOffsetInst = dyn_cast_or_null<Instruction>(perThreadOffset);
@@ -1011,10 +1026,12 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
10111026 for (auto pAI : allocaInsts)
10121027 {
10131028 // %bufferOffset = mul i32 %simdSize, <scalarBufferOffset>
1014- // %bufferOffsetForThread = add i32 %perThreadOffset, %bufferOffset
1029+ // %bufferOffset = zext i32 %bufferOffset to i64
1030+ // %bufferOffsetForThread = add i64 %perThreadOffset, %bufferOffset
10151031 // %perLaneOffset = mul i32 %simdLaneId, <bufferSize>
1016- // %totalOffset = add i32 %bufferOffsetForThread, %perLaneOffset
1017- // %privateBufferGEP = getelementptr i8* %privateBase, i32 %totalOffset
1032+ // %perLaneOffset = zext i32 %perLaneOffset to i64
1033+ // %totalOffset = add i64 %bufferOffsetForThread, %perLaneOffset
1034+ // %privateBufferGEP = getelementptr i8* %privateBase, i64 %totalOffset
10181035 // %privateBuffer = bitcast i8* %offsettmp1 to <buffer type>
10191036
10201037 IGCLLVM::IRBuilder<> builder (pAI);
@@ -1025,9 +1042,13 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
10251042 unsigned int bufferSize = m_ModAllocaInfo->getConstBufferSize (pAI);
10261043
10271044 Value* bufferOffset = builder.CreateMul (simdSize, ConstantInt::get (typeInt32, scalarBufferOffset), VALUE_NAME (pAI->getName () + " .SIMDBufferOffset" ));
1045+ if (!safe32bitOffset)
1046+ bufferOffset = builder.CreateZExt (bufferOffset, typeInt64);
10281047 Value* bufferOffsetForThread = builder.CreateAdd (perThreadOffset, bufferOffset, VALUE_NAME (pAI->getName () + " .bufferOffsetForThread" ));
10291048 Value* perLaneOffset = isUniform ? builder.getInt32 (0 ) : simdLaneId;
10301049 perLaneOffset = builder.CreateMul (perLaneOffset, ConstantInt::get (typeInt32, bufferSize), VALUE_NAME (" perLaneOffset" ));
1050+ if (!safe32bitOffset)
1051+ perLaneOffset = builder.CreateZExt (perLaneOffset, typeInt64);
10311052 Value* totalOffset = builder.CreateAdd (bufferOffsetForThread, perLaneOffset, VALUE_NAME (pAI->getName () + " .totalOffset" ));
10321053 Value* privateBufferGEP = builder.CreateGEP (privateMemPtr, totalOffset, VALUE_NAME (pAI->getName () + " .privateBufferGEP" ));
10331054 Value* privateBuffer = builder.CreatePointerCast (privateBufferGEP, pAI->getType (), VALUE_NAME (pAI->getName () + " .privateBuffer" ));
0 commit comments