Skip to content

Commit 23d7eca

Browse files
committed
[AMDGPU] Rematerialize VGPR candidates when SGPR spills to VGPR over the VGPR limit
Before, when selecting candidates to rematerialize, we would only consider SGPR candidates when there was an excess of SGPR registers. Failing to eliminate the excess would result in spills to VGPRs. This is normally not an issue, unless spilling to VGPRs results in excess VGPRs. This patch does 2 things: * It relaxes the GCNRPTarget success criteria: now we accept regions where we spill SGPRs to VGPRs, as long as this does not end up in excess VGPRs. * It changes isSaveBeneficial to consider the excess VGPRs (which includes the SGPRs that would be spilled to VGPR). With these changes, the compiler rematerializes VGPRs when the excess SGPRs would result in VGPR excess. This has some unadressed flaws: we should attempt to rematerialize SGPRs first in order to eliminate the SGPR excess that results in VGPR excess. Solves SWDEV-549940
1 parent e331b29 commit 23d7eca

File tree

6 files changed

+52
-179
lines changed

6 files changed

+52
-179
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,15 @@ struct RegExcess {
104104
unsigned AGPR = 0;
105105

106106
bool anyExcess() const { return SGPR || VGPR || ArchVGPR || AGPR; }
107+
bool spillsToMemory() const { return VGPR || ArchVGPR || AGPR; }
107108

108109
RegExcess(const MachineFunction &MF, const GCNRegPressure &RP,
109110
unsigned MaxSGPRs, unsigned MaxVGPRs) {
110111
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
111112
SGPR = std::max(static_cast<int>(RP.getSGPRNum() - MaxSGPRs), 0);
112113

113114
// The number of virtual VGPRs required to handle excess SGPR
114-
auto WaveSize = ST.getWavefrontSize();
115+
unsigned WaveSize = ST.getWavefrontSize();
115116
unsigned VGPRForSGPRSpills = divideCeil(SGPR, WaveSize);
116117

117118
unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
@@ -415,15 +416,15 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg) const {
415416
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
416417
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
417418

419+
RegExcess Excess(MF, RP, MaxSGPRs, MaxVGPRs);
420+
418421
if (SRI->isSGPRClass(RC))
419-
return RP.getSGPRNum() > MaxSGPRs;
420-
unsigned NumVGPRs =
421-
SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
422-
// The addressable limit must always be respected.
423-
if (NumVGPRs > MaxVGPRs)
424-
return true;
425-
// For unified RFs, combined VGPR usage limit must be respected as well.
426-
return UnifiedRF && RP.getVGPRNum(true) > MaxUnifiedVGPRs;
422+
return Excess.SGPR;
423+
424+
if (SRI->isAGPRClass(RC))
425+
return Excess.AGPR;
426+
427+
return Excess.VGPR || Excess.ArchVGPR;
427428
}
428429

429430
bool GCNRPTarget::satisfied() const {
@@ -434,6 +435,11 @@ bool GCNRPTarget::satisfied() const {
434435
return true;
435436
}
436437

438+
bool GCNRPTarget::spillsToMemory() const {
439+
RegExcess Excess(MF, RP, MaxSGPRs, MaxVGPRs);
440+
return Excess.spillsToMemory();
441+
}
442+
437443
///////////////////////////////////////////////////////////////////////////////
438444
// GCNRPTracker
439445

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ class GCNRPTarget {
234234

235235
/// Whether the current RP is at or below the defined pressure target.
236236
bool satisfied() const;
237+
bool spillsToMemory() const;
237238

238239
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
239240
friend raw_ostream &operator<<(raw_ostream &OS, const GCNRPTarget &Target) {

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2046,14 +2046,16 @@ bool PreRARematStage::setObjective() {
20462046
// Set up "spilling targets" for all regions.
20472047
unsigned MaxSGPRs = ST.getMaxNumSGPRs(F);
20482048
unsigned MaxVGPRs = ST.getMaxNumVGPRs(F);
2049+
bool SpillsToMemory = false;
20492050
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
20502051
const GCNRegPressure &RP = DAG.Pressure[I];
20512052
GCNRPTarget &Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs, MF, RP);
20522053
if (!Target.satisfied())
20532054
TargetRegions.set(I);
2055+
SpillsToMemory |= Target.spillsToMemory();
20542056
}
20552057

2056-
if (TargetRegions.any() || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
2058+
if (SpillsToMemory || DAG.MinOccupancy >= MFI.getMaxWavesPerEU()) {
20572059
// In addition to register usage being above addressable limits, occupancy
20582060
// below the minimum is considered like "spilling" as well.
20592061
TargetOcc = std::nullopt;

llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -822,31 +822,31 @@ body: |
822822
; GFX908-NEXT: [[V_CVT_I32_F64_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
823823
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
824824
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
825-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
826-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
827-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
828-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
829-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
830825
; GFX908-NEXT: {{ $}}
831826
; GFX908-NEXT: bb.1:
832827
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
833828
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
834829
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
835830
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
836831
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
837-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
838-
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
832+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
833+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
834+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
835+
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]]
839836
; GFX908-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
837+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
838+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
840839
; GFX908-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
841840
; GFX908-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
842-
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]]
841+
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_30]], implicit [[V_CVT_I32_F64_e32_31]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]]
843842
; GFX908-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
844843
; GFX908-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]]
845844
; GFX908-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
846845
; GFX908-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]]
847846
; GFX908-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
848847
; GFX908-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]]
849-
; GFX908-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]]
848+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
849+
; GFX908-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_32]]
850850
; GFX908-NEXT: S_NOP 0, implicit [[DEF28]]
851851
; GFX908-NEXT: S_ENDPGM 0
852852
;
@@ -910,32 +910,32 @@ body: |
910910
; GFX90A-NEXT: [[DEF26:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
911911
; GFX90A-NEXT: [[DEF27:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
912912
; GFX90A-NEXT: [[DEF28:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
913-
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
914-
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
915-
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
916-
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
917-
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
913+
; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
914+
; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
915+
; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
916+
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
917+
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode
918918
; GFX90A-NEXT: {{ $}}
919919
; GFX90A-NEXT: bb.1:
920920
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]], implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]], implicit [[V_CVT_I32_F64_e32_4]]
921921
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_5]], implicit [[V_CVT_I32_F64_e32_6]], implicit [[V_CVT_I32_F64_e32_7]], implicit [[V_CVT_I32_F64_e32_8]], implicit [[V_CVT_I32_F64_e32_9]]
922922
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_10]], implicit [[V_CVT_I32_F64_e32_11]], implicit [[V_CVT_I32_F64_e32_12]], implicit [[V_CVT_I32_F64_e32_13]], implicit [[V_CVT_I32_F64_e32_14]]
923923
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_15]], implicit [[V_CVT_I32_F64_e32_16]], implicit [[V_CVT_I32_F64_e32_17]], implicit [[V_CVT_I32_F64_e32_18]], implicit [[V_CVT_I32_F64_e32_19]]
924924
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]], implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]], implicit [[V_CVT_I32_F64_e32_24]]
925-
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
926-
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[V_CVT_I32_F64_e32_27]], implicit [[V_CVT_I32_F64_e32_28]]
927-
; GFX90A-NEXT: [[DEF29:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
928-
; GFX90A-NEXT: [[DEF30:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
929-
; GFX90A-NEXT: [[DEF31:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
930-
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]]
931-
; GFX90A-NEXT: [[DEF32:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
932-
; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]]
933-
; GFX90A-NEXT: S_NOP 0, implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
934-
; GFX90A-NEXT: S_NOP 0, implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]]
935-
; GFX90A-NEXT: S_NOP 0, implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]]
936-
; GFX90A-NEXT: S_NOP 0, implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]]
937-
; GFX90A-NEXT: S_NOP 0, implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[V_CVT_I32_F64_e32_31]]
938-
; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]]
925+
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
926+
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
927+
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
928+
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_25]], implicit [[V_CVT_I32_F64_e32_26]], implicit [[V_CVT_I32_F64_e32_28]], implicit [[V_CVT_I32_F64_e32_29]], implicit [[V_CVT_I32_F64_e32_30]]
929+
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
930+
; GFX90A-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
931+
; GFX90A-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]], implicit [[V_CVT_I32_F64_e32_32]], implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]]
932+
; GFX90A-NEXT: S_NOP 0, implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
933+
; GFX90A-NEXT: S_NOP 0, implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]]
934+
; GFX90A-NEXT: S_NOP 0, implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]]
935+
; GFX90A-NEXT: S_NOP 0, implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]]
936+
; GFX90A-NEXT: S_NOP 0, implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]]
937+
; GFX90A-NEXT: S_NOP 0, implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[V_CVT_I32_F64_e32_27]]
938+
; GFX90A-NEXT: S_NOP 0, implicit [[DEF32]]
939939
; GFX90A-NEXT: S_ENDPGM 0
940940
bb.0:
941941
successors: %bb.1
@@ -1926,8 +1926,6 @@ body: |
19261926
; GFX908: bb.0:
19271927
; GFX908-NEXT: successors: %bb.1(0x80000000)
19281928
; GFX908-NEXT: {{ $}}
1929-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
1930-
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
19311929
; GFX908-NEXT: [[DEF:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
19321930
; GFX908-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
19331931
; GFX908-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF
@@ -2212,6 +2210,8 @@ body: |
22122210
; GFX908-NEXT: S_NOP 0, implicit [[DEF209]], implicit [[DEF210]], implicit [[DEF211]], implicit [[DEF212]], implicit [[DEF213]], implicit [[DEF214]], implicit [[DEF215]], implicit [[DEF216]], implicit [[DEF217]], implicit [[DEF218]]
22132211
; GFX908-NEXT: S_NOP 0, implicit [[DEF219]], implicit [[DEF220]], implicit [[DEF221]], implicit [[DEF222]], implicit [[DEF223]], implicit [[DEF224]], implicit [[DEF225]], implicit [[DEF226]], implicit [[DEF227]], implicit [[DEF228]]
22142212
; GFX908-NEXT: S_NOP 0, implicit [[DEF229]], implicit [[DEF230]], implicit [[DEF231]], implicit [[DEF232]], implicit [[DEF233]], implicit [[DEF234]], implicit [[DEF235]], implicit [[DEF236]], implicit [[DEF237]], implicit [[DEF238]]
2213+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 257, implicit $exec, implicit $mode
2214+
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 258, implicit $exec, implicit $mode
22152215
; GFX908-NEXT: S_NOP 0, implicit [[DEF239]], implicit [[DEF240]], implicit [[DEF241]], implicit [[DEF242]], implicit [[DEF243]], implicit [[DEF244]], implicit [[DEF245]], implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
22162216
; GFX908-NEXT: S_ENDPGM 0
22172217
;

0 commit comments

Comments
 (0)