-
Notifications
You must be signed in to change notification settings - Fork 15.3k
AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD #157845
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD #157845
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-amdgpu Author: Petar Avramovic (petar-avramovic) ChangesUse same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD. Full diff: https://github.com/llvm/llvm-project/pull/157845.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5c958dfe6954f..398c99b3bd127 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10281,7 +10281,7 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
- unsigned opcode = MI.getOpcode();
+ unsigned Opcode = MI.getOpcode();
auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
@@ -10301,7 +10301,7 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
// If the target supports globally addressable scratch, the mapping from
// scratch memory to the flat aperture changes therefore an address space cast
// is no longer uniform.
- if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
+ if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
return HandleAddrSpaceCast(MI);
if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
@@ -10329,7 +10329,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
//
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
- if (opcode == AMDGPU::G_LOAD) {
+ if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
+ Opcode == AMDGPU::G_SEXTLOAD) {
if (MI.memoperands_empty())
return InstructionUniformity::NeverUniform; // conservative assumption
@@ -10343,10 +10344,10 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::Default;
}
- if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
- AMDGPU::isGenericAtomic(opcode)) {
+ if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
+ Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
+ Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
+ AMDGPU::isGenericAtomic(Opcode)) {
return InstructionUniformity::NeverUniform;
}
return InstructionUniformity::Default;
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
index cb3c2de5b8753..d799cd2057f47 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
@@ -46,13 +46,13 @@ body: |
%6:_(p5) = G_IMPLICIT_DEF
; Atomic load
- ; CHECK-NOT: DIVERGENT
-
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_ZEXTLOAD
%0:_(s32) = G_ZEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
; flat load
- ; CHECK-NOT: DIVERGENT
-
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_ZEXTLOAD
%2:_(s32) = G_ZEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
; Gloabal load
@@ -60,7 +60,8 @@ body: |
%3:_(s32) = G_ZEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
; Private load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_ZEXTLOAD
%5:_(s32) = G_ZEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5)
G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
@@ -80,11 +81,13 @@ body: |
%6:_(p5) = G_IMPLICIT_DEF
; Atomic load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_SEXTLOAD
%0:_(s32) = G_SEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
; flat load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_SEXTLOAD
%2:_(s32) = G_SEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
; Gloabal load
@@ -92,7 +95,8 @@ body: |
%3:_(s32) = G_SEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
; Private load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_SEXTLOAD
%5:_(s32) = G_SEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5)
G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
|
bfa8de6 to
bb121e2
Compare
8573b17 to
f426257
Compare
Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD. Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD should be always divergent.
f426257 to
310a7b6
Compare
Merge activity
|

Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD.
Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD
should be always divergent.