Skip to content

Commit 41c6859

Browse files
AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD (llvm#157845)
Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD. Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD should be always divergent.
1 parent 265b032 commit 41c6859

File tree

2 files changed

+20
-15
lines changed

2 files changed

+20
-15
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10281,7 +10281,7 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
1028110281
InstructionUniformity
1028210282
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
1028310283
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
10284-
unsigned opcode = MI.getOpcode();
10284+
unsigned Opcode = MI.getOpcode();
1028510285

1028610286
auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) {
1028710287
Register Dst = MI.getOperand(0).getReg();
@@ -10301,7 +10301,7 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
1030110301
// If the target supports globally addressable scratch, the mapping from
1030210302
// scratch memory to the flat aperture changes therefore an address space cast
1030310303
// is no longer uniform.
10304-
if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
10304+
if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
1030510305
return HandleAddrSpaceCast(MI);
1030610306

1030710307
if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
@@ -10329,7 +10329,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
1032910329
//
1033010330
// All other loads are not divergent, because if threads issue loads with the
1033110331
// same arguments, they will always get the same result.
10332-
if (opcode == AMDGPU::G_LOAD) {
10332+
if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10333+
Opcode == AMDGPU::G_SEXTLOAD) {
1033310334
if (MI.memoperands_empty())
1033410335
return InstructionUniformity::NeverUniform; // conservative assumption
1033510336

@@ -10343,10 +10344,10 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
1034310344
return InstructionUniformity::Default;
1034410345
}
1034510346

10346-
if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
10347-
opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10348-
opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10349-
AMDGPU::isGenericAtomic(opcode)) {
10347+
if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10348+
Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10349+
Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10350+
AMDGPU::isGenericAtomic(Opcode)) {
1035010351
return InstructionUniformity::NeverUniform;
1035110352
}
1035210353
return InstructionUniformity::Default;

llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,22 @@ body: |
4646
%6:_(p5) = G_IMPLICIT_DEF
4747
4848
; Atomic load
49-
; CHECK-NOT: DIVERGENT
50-
49+
; CHECK: DIVERGENT
50+
; CHECK-SAME: G_ZEXTLOAD
5151
%0:_(s32) = G_ZEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
5252
5353
; flat load
54-
; CHECK-NOT: DIVERGENT
55-
54+
; CHECK: DIVERGENT
55+
; CHECK-SAME: G_ZEXTLOAD
5656
%2:_(s32) = G_ZEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
5757
5858
; Gloabal load
5959
; CHECK-NOT: DIVERGENT
6060
%3:_(s32) = G_ZEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
6161
6262
; Private load
63-
; CHECK-NOT: DIVERGENT
63+
; CHECK: DIVERGENT
64+
; CHECK-SAME: G_ZEXTLOAD
6465
%5:_(s32) = G_ZEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5)
6566
G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
6667
G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
@@ -80,19 +81,22 @@ body: |
8081
%6:_(p5) = G_IMPLICIT_DEF
8182
8283
; Atomic load
83-
; CHECK-NOT: DIVERGENT
84+
; CHECK: DIVERGENT
85+
; CHECK-SAME: G_SEXTLOAD
8486
%0:_(s32) = G_SEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
8587
8688
; flat load
87-
; CHECK-NOT: DIVERGENT
89+
; CHECK: DIVERGENT
90+
; CHECK-SAME: G_SEXTLOAD
8891
%2:_(s32) = G_SEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
8992
9093
; Gloabal load
9194
; CHECK-NOT: DIVERGENT
9295
%3:_(s32) = G_SEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
9396
9497
; Private load
95-
; CHECK-NOT: DIVERGENT
98+
; CHECK: DIVERGENT
99+
; CHECK-SAME: G_SEXTLOAD
96100
%5:_(s32) = G_SEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5)
97101
G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
98102
G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)

0 commit comments

Comments
 (0)