Skip to content

Commit cdaeaee

Browse files
mkhoshzaigcbot
authored andcommitted
Adding an experimental feature for load coalescing
Adding an experimental feature for load coalescing
1 parent b6c58e9 commit cdaeaee

File tree

3 files changed

+78
-3
lines changed

3 files changed

+78
-3
lines changed

IGC/Compiler/CISACodeGen/MemOpt.cpp

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2716,6 +2716,8 @@ namespace {
27162716
return splitVectorType(V, LdStKind::IS_LOAD);
27172717
}
27182718

2719+
void AllowDummyLoadCoalescing(InstAndOffsetPairs Loads);
2720+
27192721
// GatherCopy:
27202722
// copy multiple values (arg: Vals) into a single Dst (return value)
27212723
// (It's a packed copy, thus size(all Vals) = size(Dst).
@@ -2822,7 +2824,7 @@ bool IGC::doLdStCombine(const CodeGenContext* CGC) {
28222824
uint32_t keyval = IGC_GET_FLAG_VALUE(EnableLdStCombine);
28232825
if ((keyval & 0x3) == 1 && !CGC->platform.LSCEnabled())
28242826
return false;
2825-
return ((keyval & 0x3) != 0);
2827+
return ((keyval & 0x3) || (keyval & 0x4));
28262828
}
28272829

28282830
uint32_t IGC::getMaxStoreBytes(const CodeGenContext* CGC) {
@@ -3351,9 +3353,13 @@ void LdStCombine::combineLoads()
33513353
if ((IGC_GET_FLAG_VALUE(EnableLdStCombine) & 0x4) == 0)
33523354
return;
33533355

3354-
// Start with OCL, then apply to other APIs.
33553356
if (m_CGC->type != ShaderType::OPENCL_SHADER)
3356-
return;
3357+
{
3358+
if (!m_CGC->getModuleMetaData()->compOpt.EnableLdStCombineforLoad)
3359+
{
3360+
return;
3361+
}
3362+
}
33573363

33583364
// All load candidates with addr = common-base + const-offset
33593365
InstAndOffsetPairs Loads;
@@ -3439,6 +3445,14 @@ void LdStCombine::combineLoads()
34393445
}
34403446
}
34413447

3448+
//Experiment: If its the last element of the load and does not fit the DWORD alignment,
3449+
//It creates a dummy load with the same alignment type as the previous load
3450+
if (m_CGC->type != ShaderType::OPENCL_SHADER)
3451+
{
3452+
if (m_CGC->getModuleMetaData()->compOpt.EnableLdStCombinewithDummyLoad)
3453+
AllowDummyLoadCoalescing(Loads);
3454+
}
3455+
34423456
// Note: For now, each load is considered once. For example,
34433457
// load a
34443458
// store x : alias to load c
@@ -3802,6 +3816,64 @@ void LdStCombine::createBundles(BasicBlock* BB, InstAndOffsetPairs& LoadStores)
38023816
markVisited(LoadStores);
38033817
}
38043818

3819+
void LdStCombine::AllowDummyLoadCoalescing(InstAndOffsetPairs Loads)
3820+
{
3821+
// Currently supports only this pattern.
3822+
// % 164 = add i32 % 114, 1020
3823+
// % 165 = and i32 % 164, 1020
3824+
// % 166 = getelementptr[1024 x half], [1024 x half] addrspace(3) * null, i32 0, i32 % 165
3825+
// %167 = load half, half addrspace(3) * %166, align 8
3826+
// % 168 = or i32 % 165, 1
3827+
// % 169 = getelementptr[1024 x half], [1024 x half] addrspace(3) * null, i32 0, i32 % 168
3828+
// % 170 = load half, half addrspace(3) * %169, align 2
3829+
// % 171 = or i32 % 165, 2
3830+
// % 172 = getelementptr[1024 x half], [1024 x half] addrspace(3) * null, i32 0, i32 % 171
3831+
// % 173 = load half, half addrspace(3) * %172, align 4
3832+
// to
3833+
// % 164 = add i32 % 114, 1020
3834+
// % 165 = and i32 % 164, 1020
3835+
// % 166 = getelementptr[1024 x half], [1024 x half] addrspace(3) * null, i32 0, i32 % 165
3836+
// %167 = load half, half addrspace(3) * %166, align 8
3837+
// % 168 = or i32 % 165, 1
3838+
// % 169 = getelementptr[1024 x half], [1024 x half] addrspace(3) * null, i32 0, i32 % 168
3839+
// % 170 = load half, half addrspace(3) * %169, align 2
3840+
// % 171 = or i32 % 165, 2
3841+
// % 172 = getelementptr[1024 x half], [1024 x half] addrspace(3) * null, i32 0, i32 % 171
3842+
// % 173 = load half, half addrspace(3) * %172, align 4
3843+
// % 174 = add i32 % 165, 3
3844+
// % 175 = getelementptr[1024 x half], [1024 x half] addrspace(3) * null, i32 0, i32 % 174
3845+
// % 176 = load half, half addrspace(3) * %175, align 2
3846+
int size = Loads.size();
3847+
LdStInfo LastLoad = Loads[size - 1];
3848+
uint32_t LastLoadSize = (uint32_t)m_DL->getTypeStoreSize(LastLoad.Inst->getType());
3849+
uint32_t currLoadSize = LastLoadSize + LastLoad.ByteOffset;
3850+
if (currLoadSize % 4)
3851+
{
3852+
//Replicating the last load to make it DWORD aligned
3853+
uint32_t newLoadSize = LastLoadSize;
3854+
if (!((currLoadSize + newLoadSize) % 4))
3855+
{
3856+
LoadInst* lead = static_cast<LoadInst*>(LastLoad.Inst);
3857+
Value* ldPtr = lead->getPointerOperand();
3858+
if (auto gep = dyn_cast<GetElementPtrInst>(ldPtr))
3859+
{
3860+
if ((gep->getNumOperands() == 3) && (isa<ConstantPointerNull>(gep->getPointerOperand())))
3861+
{
3862+
IRBuilder<> irBuilder(LastLoad.Inst);
3863+
Value* AddInst = irBuilder.CreateAdd(gep->getOperand(2), irBuilder.getInt32(1));
3864+
Value* gepArg[] = { gep->getOperand(1), AddInst };
3865+
Value* Addr = irBuilder.CreateInBoundsGEP(gep->getSourceElementType(),
3866+
gep->getOperand(0), gepArg);
3867+
Instruction* dummyLoad = static_cast<Instruction*>
3868+
(irBuilder.CreateLoad(IGCLLVM::getNonOpaquePtrEltTy(Addr->getType()), Addr));
3869+
Loads.push_back(LdStInfo(dummyLoad, LastLoad.ByteOffset + newLoadSize));
3870+
}
3871+
}
3872+
}
3873+
}
3874+
return;
3875+
}
3876+
38053877
// A member of layout struct can be a vector type. This function will decide
38063878
// if the vector type or a sequence of its elements' types shall be used as
38073879
// the layout struct's member types. If spliting a vector type into a sequence

IGC/common/MDFrameWork.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,8 @@ namespace IGC
446446
bool DisableConstantCoalescing = false;
447447
bool EnableUndefAlphaOutputAsRed = true;
448448
bool WaEnableALTModeVisaWA = false;
449+
bool EnableLdStCombineforLoad = false;
450+
bool EnableLdStCombinewithDummyLoad = false;
449451
bool NewSpillCostFunction = false;
450452
bool ForceLargeGRFNum4RQ = false;
451453
bool DisableEUFusion = false;

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ DECLARE_IGC_REGKEY(DWORD, MemOptGEPCanon, 2, "[test] GEP canon
481481
DECLARE_IGC_REGKEY(bool, DisableMemOpt2, false, "Disable MemOpt2", false)
482482
DECLARE_IGC_REGKEY(bool, EnableExplicitCopyForByVal, true, "Enable generating an explicit copy (alloca + memcpy) in a caller for aggregate argumentes with byval attribute", true)
483483
DECLARE_IGC_REGKEY(DWORD, EnableLdStCombine, 1, "Enable load/store combine pass if set to 1 (lsc message only) or 2; bit 3 = 1 [tmp for testing] : enabled load combine (intend to replace memopt)", true)
484+
DECLARE_IGC_REGKEY(bool, EnableLdStCombinewithDummyLoad, false, "Adds extra load instruction to increase the size of coalesced load", true)
484485
DECLARE_IGC_REGKEY(DWORD, MaxStoreVectorSizeInBytes, 0, "[LdStCombine] the max non-uniform vector size for the coalesced store. 0: compiler choice (default, 16(4DW)); others: 4/8/16/32", true)
485486
DECLARE_IGC_REGKEY(DWORD, MaxLoadVectorSizeInBytes, 0, "[LdStCombine] the max non-uniform vector size for the coalesced load. 0: compiler choice (default, 16(4DW)); others: 4/8/16/32", true)
486487
DECLARE_IGC_REGKEY(bool, DisableMergeStore, false, "[temp]If EnableLdStCombine is on, disable mergestore (memopt) if this is set. Temp key for testing", true)

0 commit comments

Comments
 (0)