Skip to content

Commit 97f7cb8

Browse files
ViacheslavRbpszymich
authored andcommitted
Use 64bit type for perThreadOffset calculation
Use 64bit type for private memory thread offsets calculation in PrivateMemoryResolution pass.
1 parent beb7544 commit 97f7cb8

File tree

4 files changed

+114
-6
lines changed

4 files changed

+114
-6
lines changed

IGC/Compiler/CISACodeGen/Platform.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1394,5 +1394,10 @@ bool limitedBCR() const
13941394
GFX_IS_DG2_G12_CONFIG(m_platformInfo.usDeviceID));
13951395
}
13961396

1397+
uint32_t getMaxAddressedHWThreads() const
1398+
{
1399+
return 4096;
1400+
}
1401+
13971402
};
13981403
}//namespace IGC

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,10 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
650650
// is alloca in the callee. Save the total private memory to the metadata.
651651
unsigned int totalPrivateMemPerWI = m_ModAllocaInfo->getTotalPrivateMemPerWI(m_currFunction);
652652

653+
// 32 is max simd width
654+
bool safe32bitOffset = m_currFunction->getParent()->getDataLayout().getPointerSize() < 8
655+
|| (totalPrivateMemPerWI * 32ull * Ctx.platform.getMaxAddressedHWThreads()) <= (uint64_t)UINT32_MAX;
656+
653657
// This change is only till the FuncMD is ported to new MD framework
654658
ModuleMetaData* const modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
655659
IGC_ASSERT(nullptr != modMD);
@@ -708,6 +712,7 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
708712
LLVMContext& C = m_currFunction->getContext();
709713

710714
IntegerType* typeInt32 = Type::getInt32Ty(C);
715+
IntegerType* typeInt64 = Type::getInt64Ty(C);
711716
// Creates intrinsics that will be lowered in the CodeGen and will handle the simd lane id
712717
Function* simdLaneIdFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_simdLaneId);
713718
// Creates intrinsics that will be lowered in the CodeGen and will handle the simd size
@@ -861,10 +866,14 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
861866

862867
ConstantInt* totalPrivateMemPerWIValue = ConstantInt::get(typeInt32, totalPrivateMemPerWI);
863868
Value* totalPrivateMemPerThread = entryBuilder.CreateMul(simdSize, totalPrivateMemPerWIValue, VALUE_NAME("totalPrivateMemPerThread"));
869+
if (!safe32bitOffset)
870+
totalPrivateMemPerThread = entryBuilder.CreateZExt(totalPrivateMemPerThread, typeInt64);
864871

865872
Function* pHWTIDFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_hw_thread_id_alloca, Type::getInt32Ty(C));
866-
llvm::Value* threadId = entryBuilder.CreateCall(pHWTIDFunc);
867-
llvm::Value* perThreadOffset = entryBuilder.CreateMul(threadId, totalPrivateMemPerThread, VALUE_NAME("perThreadOffset"));
873+
Value* threadId = entryBuilder.CreateCall(pHWTIDFunc);
874+
if (!safe32bitOffset)
875+
threadId = entryBuilder.CreateZExt(threadId, typeInt64);
876+
Value* perThreadOffset = entryBuilder.CreateMul(threadId, totalPrivateMemPerThread, VALUE_NAME("perThreadOffset"));
868877
perThreadOffset = entryBuilder.CreateZExt(perThreadOffset, privateBase->getType());
869878
privateBase = entryBuilder.CreateAdd(privateBase, perThreadOffset);
870879
}
@@ -963,17 +972,21 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
963972
// %simdLaneId = zext i16 simdLaneId16 to i32
964973
// %simdSize = call i32 @llvm.gen.simdSize()
965974
// %totalPrivateMemPerThread = mul i32 %simdSize, <totalPrivateMemPerWI>
975+
// %totalPrivateMemPerThread = zext i32 %totalPrivateMemPerThread to i64
966976

967977
// %r0.5 = extractelement <8 x i32> %r0, i32 5
968978
// %threadId = and i32 %r0.5, 0x1FF|0x3FF (Thread ID is in the lower 9 bits or 10 bit(KBL & CNL+) of r0.5)
969-
// %perThreadOffset = mul i32 %threadId, %totalPrivateMemPerThread
979+
// %threadId = zext i32 %threadId to i64
980+
// %perThreadOffset = mul i64 %threadId, %totalPrivateMemPerThread
970981

971982
ConstantInt* totalPrivateMemPerWIValue = ConstantInt::get(typeInt32, totalPrivateMemPerWI);
972983

973984
Instruction* simdLaneId16 = entryBuilder.CreateCall(simdLaneIdFunc, llvm::None, VALUE_NAME("simdLaneId16"));
974985
Value* simdLaneId = entryBuilder.CreateIntCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"));
975986
Instruction* simdSize = entryBuilder.CreateCall(simdSizeFunc, llvm::None, VALUE_NAME("simdSize"));
976987
Value* totalPrivateMemPerThread = entryBuilder.CreateMul(simdSize, totalPrivateMemPerWIValue, VALUE_NAME("totalPrivateMemPerThread"));
988+
if (!safe32bitOffset)
989+
totalPrivateMemPerThread = entryBuilder.CreateZExt(totalPrivateMemPerThread, typeInt64);
977990

978991
Function* pHWTIDFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_hw_thread_id_alloca, Type::getInt32Ty(C));
979992
Value* threadId = entryBuilder.CreateCall(pHWTIDFunc);
@@ -990,6 +1003,8 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
9901003

9911004
threadId = entryBuilder.CreateOr(FFSID, shlThreadID, VALUE_NAME("threadId"));
9921005
}
1006+
if (!safe32bitOffset)
1007+
threadId = entryBuilder.CreateZExt(threadId, typeInt64);
9931008

9941009
Value* perThreadOffset = entryBuilder.CreateMul(threadId, totalPrivateMemPerThread, VALUE_NAME("perThreadOffset"));
9951010
auto perThreadOffsetInst = dyn_cast_or_null<Instruction>(perThreadOffset);
@@ -1011,10 +1026,12 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
10111026
for (auto pAI : allocaInsts)
10121027
{
10131028
// %bufferOffset = mul i32 %simdSize, <scalarBufferOffset>
1014-
// %bufferOffsetForThread = add i32 %perThreadOffset, %bufferOffset
1029+
// %bufferOffset = zext i32 %bufferOffset to i64
1030+
// %bufferOffsetForThread = add i64 %perThreadOffset, %bufferOffset
10151031
// %perLaneOffset = mul i32 %simdLaneId, <bufferSize>
1016-
// %totalOffset = add i32 %bufferOffsetForThread, %perLaneOffset
1017-
// %privateBufferGEP = getelementptr i8* %privateBase, i32 %totalOffset
1032+
// %perLaneOffset = zext i32 %perLaneOffset to i64
1033+
// %totalOffset = add i64 %bufferOffsetForThread, %perLaneOffset
1034+
// %privateBufferGEP = getelementptr i8* %privateBase, i64 %totalOffset
10181035
// %privateBuffer = bitcast i8* %offsettmp1 to <buffer type>
10191036

10201037
IGCLLVM::IRBuilder<> builder(pAI);
@@ -1025,9 +1042,13 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
10251042
unsigned int bufferSize = m_ModAllocaInfo->getConstBufferSize(pAI);
10261043

10271044
Value* bufferOffset = builder.CreateMul(simdSize, ConstantInt::get(typeInt32, scalarBufferOffset), VALUE_NAME(pAI->getName() + ".SIMDBufferOffset"));
1045+
if (!safe32bitOffset)
1046+
bufferOffset = builder.CreateZExt(bufferOffset, typeInt64);
10281047
Value* bufferOffsetForThread = builder.CreateAdd(perThreadOffset, bufferOffset, VALUE_NAME(pAI->getName() + ".bufferOffsetForThread"));
10291048
Value* perLaneOffset = isUniform ? builder.getInt32(0) : simdLaneId;
10301049
perLaneOffset = builder.CreateMul(perLaneOffset, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
1050+
if (!safe32bitOffset)
1051+
perLaneOffset = builder.CreateZExt(perLaneOffset, typeInt64);
10311052
Value* totalOffset = builder.CreateAdd(bufferOffsetForThread, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
10321053
Value* privateBufferGEP = builder.CreateGEP(privateMemPtr, totalOffset, VALUE_NAME(pAI->getName() + ".privateBufferGEP"));
10331054
Value* privateBuffer = builder.CreatePointerCast(privateBufferGEP, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2022 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt --igc-private-mem-resolution --platformpvc -S < %s 2>&1 | FileCheck %s
10+
11+
define spir_kernel void @testallocabig(i8* %privateBase) {
12+
entry:
13+
%0 = alloca [50000 x float], align 4
14+
ret void
15+
; CHECK-LABEL: entry:
16+
; CHECK: [[simdLaneId16:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.simdLaneId()
17+
; CHECK: [[simdLaneId:%[A-z0-9]*]] = zext i16 [[simdLaneId16]] to i32
18+
; CHECK: [[simdSize:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.simdSize()
19+
; CHECK: [[totalPrivateMemPerThread:%[A-z0-9]*]] = mul i32 [[simdSize]], 200000
20+
; CHECK: [[ZXT0:%[A-z0-9]*]] = zext i32 [[totalPrivateMemPerThread]] to i64
21+
; CHECK: [[CAL0:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.hw.thread.id.alloca.i32()
22+
; CHECK: [[ZXT1:%[A-z0-9]*]] = zext i32 [[CAL0]] to i64
23+
; CHECK: [[perThreadOffset:%[A-z0-9]*]] = mul i64 [[ZXT1]], [[ZXT0]]
24+
; CHECK: [[SIMDBufferOffset:%[.A-z0-9]*]] = mul i32 [[simdSize]], 0
25+
; CHECK: [[ZXT2:%[A-z0-9]*]] = zext i32 [[SIMDBufferOffset]] to i64
26+
; CHECK: [[bufferOffsetForThread:%[.A-z0-9]*]] = add i64 [[perThreadOffset]], [[ZXT2]]
27+
; CHECK: [[perLaneOffset:%[A-z0-9]*]] = mul i32 [[simdLaneId]], 200000
28+
; CHECK: [[ZXT3:%[A-z0-9]*]] = zext i32 [[perLaneOffset]] to i64
29+
; CHECK: [[totalOffset:%[.A-z0-9]*]] = add i64 [[bufferOffsetForThread]], [[ZXT3]]
30+
; CHECK: [[privateBufferGEP:%[.A-z0-9]*]] = getelementptr i8, i8* %privateBase, i64 [[totalOffset]]
31+
; CHECK: [[privateBuffer:%[.A-z0-9]*]] = bitcast i8* [[privateBufferGEP]] to [50000 x float]*
32+
; CHECK: ret void
33+
}
34+
35+
!IGCMetadata = !{!0}
36+
!igc.functions = !{!1}
37+
38+
!0 = !{!"ModuleMD"}
39+
!1 = !{void (i8*)* @testallocabig, !2}
40+
!2 = !{!3, !4}
41+
!3 = !{!"function_type", i32 0}
42+
!4 = !{!"implicit_arg_desc", !5}
43+
!5 = !{i32 12}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2022 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
;
9+
; RUN: igc_opt --igc-private-mem-resolution --platformpvc -S < %s 2>&1 | FileCheck %s
10+
11+
define spir_kernel void @testallocasmall(i8* %privateBase) {
12+
entry:
13+
%0 = alloca [100 x float], align 4
14+
ret void
15+
; CHECK-LABEL: entry:
16+
; CHECK: [[simdLaneId16:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.simdLaneId()
17+
; CHECK: [[simdLaneId:%[A-z0-9]*]] = zext i16 [[simdLaneId16]] to i32
18+
; CHECK: [[simdSize:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.simdSize()
19+
; CHECK: [[totalPrivateMemPerThread:%[.A-z0-9]*]] = mul i32 [[simdSize]], 400
20+
; CHECK: [[CALL:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.hw.thread.id.alloca.i32()
21+
; CHECK: [[perThreadOffset:%[A-z0-9]*]] = mul i32 [[CALL]], [[totalPrivateMemPerThread]]
22+
; CHECK: [[SIMDBufferOffset:%[.A-z0-9]*]] = mul i32 [[simdSize]], 0
23+
; CHECK: [[bufferOffsetForThread:%[.A-z0-9]*]] = add i32 [[perThreadOffset]], [[SIMDBufferOffset]]
24+
; CHECK: [[perLaneOffset:%[A-z0-9]*]] = mul i32 [[simdLaneId]], 400
25+
; CHECK: [[totalOffset:%[.A-z0-9]*]] = add i32 [[bufferOffsetForThread]], [[perLaneOffset]]
26+
; CHECK: [[privateBufferGEP:%[.A-z0-9]*]] = getelementptr i8, i8* %privateBase, i32 [[totalOffset]]
27+
; CHECK: [[privateBuffer:%[.A-z0-9]*]] = bitcast i8* [[privateBufferGEP]] to [100 x float]*
28+
; CHECK: ret void
29+
}
30+
31+
!IGCMetadata = !{!0}
32+
!igc.functions = !{!1}
33+
34+
!0 = !{!"ModuleMD"}
35+
!1 = !{void (i8*)* @testallocasmall, !2}
36+
!2 = !{!3, !4}
37+
!3 = !{!"function_type", i32 0}
38+
!4 = !{!"implicit_arg_desc", !5}
39+
!5 = !{i32 12}

0 commit comments

Comments
 (0)