Skip to content

Commit c15b311

Browse files
committed
[AMDGPU] Insert s_wait_xcnt(0) before atomics to work around write-combining miss hazard
This patch adds a workaround for a hazzard on GFX1250, which inserts an `s_wait_xcnt(0)` instruction before any atomic operation that might write to memory. Fixes SWDEV-543703.
1 parent d4847f7 commit c15b311

File tree

46 files changed

+1682
-1
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1682
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,12 @@ def FeatureCvtFP8VOP1Bug : SubtargetFeature<"cvt-fp8-vop1-bug",
895895
[FeatureFP8ConversionInsts]
896896
>;
897897

898+
def FeatureWriteCombiningMissesHazards : SubtargetFeature<"write-combining-misses-hazards",
899+
"HasWriteCombiningMissesHazards",
900+
"true",
901+
"Write combining misses hazards that require s_wait_cnt(0) before every atomic operation"
902+
>;
903+
898904
def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
899905
"HasPkFmacF16Inst",
900906
"true",
@@ -2145,6 +2151,7 @@ def FeatureISAVersion12_50 : FeatureSet<
21452151
FeatureXNACK,
21462152
FeatureClusters,
21472153
FeatureD16Writes32BitVgpr,
2154+
FeatureWriteCombiningMissesHazards,
21482155
]>;
21492156

21502157
def FeatureISAVersion12_51 : FeatureSet<
@@ -2945,6 +2952,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">;
29452952
def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
29462953
def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
29472954

2955+
def HasWriteCombiningMissesHazards : Predicate<"Subtarget->hasWriteCombiningMissesHazards()">;
2956+
29482957
def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
29492958

29502959
def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12691269
fixScratchBaseForwardingHazard(MI);
12701270
if (ST.setRegModeNeedsVNOPs())
12711271
fixSetRegMode(MI);
1272+
if (ST.hasWriteCombiningMissesHazards())
1273+
fixWriteCombiningMissesHazards(MI);
12721274
}
12731275

12741276
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -2177,6 +2179,29 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
21772179
return true;
21782180
}
21792181

2182+
/// This function inserts an s_wait_cnt(0) before every atomic store/RMW
2183+
/// operation to work around the write combining hazard.
2184+
bool GCNHazardRecognizer::fixWriteCombiningMissesHazards(MachineInstr *MI) {
2185+
if (!SIInstrInfo::isAtomic(*MI) || !MI->mayStore())
2186+
return false;
2187+
2188+
// If the previous instruction is an s_wait_xcnt, and the count is 0, we don't
2189+
// need to do anything.
2190+
MachineBasicBlock &MBB = *MI->getParent();
2191+
auto Itr = MachineBasicBlock::iterator(MI);
2192+
auto PrevItr = std::prev(Itr);
2193+
if (Itr != MBB.begin() && (PrevItr->getOpcode() == AMDGPU::S_WAIT_XCNT_soft ||
2194+
PrevItr->getOpcode() == AMDGPU::S_WAIT_XCNT)) {
2195+
int64_t Cnt = PrevItr->getOperand(0).getImm();
2196+
if (Cnt == 0)
2197+
return false;
2198+
}
2199+
2200+
BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_WAIT_XCNT_soft))
2201+
.addImm(0);
2202+
return true;
2203+
}
2204+
21802205
bool GCNHazardRecognizer::fixShift64HighRegBug(MachineInstr *MI) {
21812206
if (!ST.hasShift64HighRegBug())
21822207
return false;

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
114114
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
115115
bool fixScratchBaseForwardingHazard(MachineInstr *MI);
116116
bool fixSetRegMode(MachineInstr *MI);
117+
bool fixWriteCombiningMissesHazards(MachineInstr *MI);
117118

118119
int checkMAIHazards(MachineInstr *MI);
119120
int checkMAIHazards908(MachineInstr *MI);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
282282
bool HasPointSampleAccel = false;
283283
bool HasLdsBarrierArriveAtomic = false;
284284
bool HasSetPrioIncWgInst = false;
285-
285+
bool HasWriteCombiningMissesHazards = false;
286286
bool RequiresCOV6 = false;
287287
bool UseBlockVGPROpsForCSR = false;
288288
bool HasGloballyAddressableScratch = false;
@@ -1836,6 +1836,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
18361836
return getGeneration() == GFX12;
18371837
}
18381838

1839+
bool hasWriteCombiningMissesHazards() const {
1840+
return HasWriteCombiningMissesHazards;
1841+
}
1842+
18391843
// Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
18401844
// read.
18411845
bool hasScratchBaseForwardingHazard() const {

0 commit comments

Comments
 (0)