Skip to content

Commit 820aee8

Browse files
krystian-andrzejewskiigcbot
authored andcommitted
Adding a reg key to disable coalescing memory fences selectively
This change is to extend an opportunity to control over the SynchronizationObjectCoalescing pass. This allows to determine which fences cannot be processed by the pass based on the memory they synchronize.
1 parent 1d7388f commit 820aee8

File tree

2 files changed

+97
-0
lines changed

2 files changed

+97
-0
lines changed

IGC/Compiler/Optimizer/SynchronizationObjectCoalescing.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,18 @@ enum InstructionMask : uint32_t
6969
LastMaskPlusOne,
7070
};
7171

72+
////////////////////////////////////////////////////////////////////////////////
73+
enum class SyncInstMask : uint32_t
74+
{
75+
None = 0x0,
76+
UntypedMemoryFence = (1 << 0),
77+
TypedMemoryFence = (1 << 1),
78+
SharedMemoryFence = (1 << 2),
79+
UrbMemoryFence = (1 << 3),
80+
ThreadGroupBarrier = (1 << 4)
81+
};
82+
83+
7284
constexpr InstructionMask AllInstructionsMask =
7385
InstructionMask{ ((InstructionMask::LastMaskPlusOne - 1) << 1) - 1 };
7486

@@ -84,6 +96,15 @@ inline constexpr InstructionMask& operator|=(InstructionMask& a, InstructionMask
8496
a = a | b;
8597
return a;
8698
}
99+
inline constexpr SyncInstMask operator|(SyncInstMask a, SyncInstMask b)
100+
{
101+
return SyncInstMask(uint32_t(a) | uint32_t(b));
102+
}
103+
inline constexpr SyncInstMask& operator|=(SyncInstMask& a, SyncInstMask b)
104+
{
105+
a = a | b;
106+
return a;
107+
}
87108

88109
inline constexpr InstructionMask operator&(InstructionMask a, InstructionMask b)
89110
{
@@ -442,9 +463,18 @@ class SynchronizationObjectCoalescing : public llvm::FunctionPass
442463
////////////////////////////////////////////////////////////////////////
443464
bool IsUntypedMemoryLscFenceOperationForGlobalAccess(const llvm::Instruction* pInst) const;
444465

466+
////////////////////////////////////////////////////////////////////////
467+
bool IsSharedLscFenceOperation(const llvm::Instruction* pInst) const;
468+
469+
////////////////////////////////////////////////////////////////////////
470+
bool IsUrbLscFenceOperation(const llvm::Instruction* pInst) const;
471+
445472
////////////////////////////////////////////////////////////////////////
446473
static bool IsTypedMemoryFenceOperation(const llvm::Instruction* pInst);
447474

475+
////////////////////////////////////////////////////////////////////////
476+
bool IsTypedMemoryLscFenceOperation(const llvm::Instruction* pInst) const;
477+
448478
////////////////////////////////////////////////////////////////////////
449479
bool IsTypedMemoryFenceOperationWithInvalidationFunctionality(const llvm::Instruction* pInst) const;
450480

@@ -663,6 +693,39 @@ bool SynchronizationObjectCoalescing::FindRedundancies()
663693

664694
for (llvm::Instruction* pInst : synchronizationOperations)
665695
{
696+
SyncInstMask instType = SyncInstMask::None;
697+
if (IsUntypedMemoryFenceOperationForGlobalAccess(pInst) ||
698+
IsUntypedMemoryLscFenceOperationForGlobalAccess(pInst))
699+
{
700+
instType |= SyncInstMask::UntypedMemoryFence;
701+
}
702+
if (IsTypedMemoryFenceOperation(pInst) ||
703+
IsTypedMemoryLscFenceOperation(pInst))
704+
{
705+
instType |= SyncInstMask::TypedMemoryFence;
706+
}
707+
if (IsUntypedMemoryFenceOperationForSharedMemoryAccess(pInst) ||
708+
IsSharedLscFenceOperation(pInst))
709+
{
710+
instType |= SyncInstMask::SharedMemoryFence;
711+
}
712+
if (IsUrbFenceOperation(pInst) ||
713+
IsUrbLscFenceOperation(pInst))
714+
{
715+
instType |= SyncInstMask::UrbMemoryFence;
716+
}
717+
if (isBarrierIntrinsic(pInst))
718+
{
719+
instType |= SyncInstMask::ThreadGroupBarrier;
720+
}
721+
IGC_ASSERT(instType != SyncInstMask::None);
722+
bool isDisabled = instType != SyncInstMask::None &&
723+
(IGC_GET_FLAG_VALUE(DisableCoalescingSynchronizationObjectMask) & static_cast<DWORD>(instType)) == static_cast<DWORD>(instType);
724+
if (isDisabled)
725+
{
726+
continue;
727+
}
728+
666729
if ((m_GlobalMemoryInstructionMask & GetDefaultWriteMemoryInstructionMask(pInst)) == 0)
667730
{
668731
#if _DEBUG
@@ -2615,6 +2678,28 @@ bool SynchronizationObjectCoalescing::IsUntypedMemoryLscFenceOperationForGlobalA
26152678
return false;
26162679
}
26172680

2681+
////////////////////////////////////////////////////////////////////////
2682+
bool SynchronizationObjectCoalescing::IsSharedLscFenceOperation(const llvm::Instruction* pInst) const
2683+
{
2684+
if (IsLscFenceOperation(pInst))
2685+
{
2686+
LSC_SFID mem = GetLscMem(pInst);
2687+
return mem == LSC_SFID::LSC_SLM;
2688+
}
2689+
return false;
2690+
}
2691+
2692+
////////////////////////////////////////////////////////////////////////
2693+
bool SynchronizationObjectCoalescing::IsTypedMemoryLscFenceOperation(const llvm::Instruction* pInst) const
2694+
{
2695+
if (IsLscFenceOperation(pInst))
2696+
{
2697+
LSC_SFID mem = GetLscMem(pInst);
2698+
return mem == LSC_SFID::LSC_TGM;
2699+
}
2700+
return false;
2701+
}
2702+
26182703
////////////////////////////////////////////////////////////////////////
26192704
bool SynchronizationObjectCoalescing::IsTypedMemoryFenceOperation(const llvm::Instruction* pInst)
26202705
{
@@ -2642,6 +2727,17 @@ bool SynchronizationObjectCoalescing::IsTypedMemoryFenceOperationWithInvalidatio
26422727
llvm::cast<llvm::ConstantInt>(pInst->getOperand(L1CacheInvalidateArg))->getValue().getBoolValue();
26432728
}
26442729

2730+
////////////////////////////////////////////////////////////////////////
2731+
bool SynchronizationObjectCoalescing::IsUrbLscFenceOperation(const llvm::Instruction* pInst) const
2732+
{
2733+
if (IsLscFenceOperation(pInst))
2734+
{
2735+
LSC_SFID mem = GetLscMem(pInst);
2736+
return mem == LSC_SFID::LSC_URB;
2737+
}
2738+
return false;
2739+
}
2740+
26452741
////////////////////////////////////////////////////////////////////////
26462742
bool SynchronizationObjectCoalescing::IsUrbFenceOperation(const llvm::Instruction* pInst)
26472743
{

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ DECLARE_IGC_REGKEY(bool, DisableBranchSwaping, false, "Setting this to
210210
DECLARE_IGC_REGKEY(bool, DisableSynchronizationObjectCoalescingPass, false, "Disable SynchronizationObjectCoalescing pass", false)
211211
DECLARE_IGC_REGKEY(bool, EnableIndependentSharedMemoryFenceFunctionality, false, "Enable treating global memory fences as shared memory fences in SynchronizationObjectCoalescing pass", false)
212212
DECLARE_IGC_REGKEY(DWORD, SynchronizationObjectCoalescingConfig, 0, "Modify the default behavior of SynchronizationObjectCoalescing value is a bitmask bit0 – remove fences in read barrier write scenario", true)
213+
DECLARE_IGC_REGKEY(DWORD, DisableCoalescingSynchronizationObjectMask, 0, "The mask is casted to IGC::SyncInstMask and informs which synchronization objects should not be coalesced. Note that synchronization objects classified in multiple types are not disabled if any bit describing them is off.", true)
213214
DECLARE_IGC_REGKEY(DWORD,SetLoopUnrollThreshold, 0, "Set the loop unroll threshold. Value 0 will use the default threshold.", false)
214215
DECLARE_IGC_REGKEY(DWORD,SetLoopUnrollThresholdForHighRegPressure, 0, "Set the loop unroll threshold for shaders with high reg pressure. Value 0 will use the default threshold.", false)
215216
DECLARE_IGC_REGKEY(DWORD,SetRegisterPressureThresholdForLoopUnroll, 96, "Set the register pressure threshold for limiting the loop unroll to smaller loops", false)

0 commit comments

Comments
 (0)