Skip to content

Commit 991f9b6

Browse files
committed
Marking dead scc
1 parent 6579973 commit 991f9b6

File tree

3 files changed

+43
-64
lines changed

3 files changed

+43
-64
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5203,9 +5203,7 @@ static uint32_t getIdentityValueFor32BitWaveReduction(unsigned Opc) {
52035203
case AMDGPU::S_MAX_I32:
52045204
return std::numeric_limits<int32_t>::min();
52055205
case AMDGPU::S_ADD_I32:
5206-
case AMDGPU::S_ADD_U64_PSEUDO:
52075206
case AMDGPU::S_SUB_I32:
5208-
case AMDGPU::S_SUB_U64_PSEUDO:
52095207
case AMDGPU::S_OR_B32:
52105208
case AMDGPU::S_XOR_B32:
52115209
return std::numeric_limits<uint32_t>::min();
@@ -5227,6 +5225,9 @@ static uint64_t getIdentityValueFor64BitWaveReduction(unsigned Opc) {
52275225
return std::numeric_limits<uint64_t>::min();
52285226
case AMDGPU::V_CMP_GT_I64_e64: // max.i64
52295227
return std::numeric_limits<int64_t>::min();
5228+
case AMDGPU::S_ADD_U64_PSEUDO:
5229+
case AMDGPU::S_SUB_U64_PSEUDO:
5230+
return std::numeric_limits<uint64_t>::min();
52305231
default:
52315232
llvm_unreachable(
52325233
"Unexpected opcode in getIdentityValueFor64BitWaveReduction");
@@ -5355,7 +5356,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53555356
if (Opc == AMDGPU::S_SUB_U64_PSEUDO) {
53565357
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_SUB_I32), NegatedValLo)
53575358
.addImm(0)
5358-
.addReg(NewAccumulator->getOperand(0).getReg());
5359+
.addReg(NewAccumulator->getOperand(0).getReg())
5360+
.setOperandDead(3); // Dead scc
53595361
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ASHR_I32), NegatedValHi)
53605362
.addReg(NegatedValLo)
53615363
.addImm(31)
@@ -5550,9 +5552,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55505552
}
55515553
case AMDGPU::S_ADD_U64_PSEUDO:
55525554
case AMDGPU::S_SUB_U64_PSEUDO: {
5553-
unsigned newOpc1 = Opc == AMDGPU::S_ADD_U64_PSEUDO ? AMDGPU::S_ADD_U32
5555+
unsigned NewOpc1 = Opc == AMDGPU::S_ADD_U64_PSEUDO ? AMDGPU::S_ADD_U32
55545556
: AMDGPU::S_SUB_U32;
5555-
unsigned newOpc2 = Opc == AMDGPU::S_ADD_U64_PSEUDO ? AMDGPU::S_ADDC_U32
5557+
unsigned NewOpc2 = Opc == AMDGPU::S_ADD_U64_PSEUDO ? AMDGPU::S_ADDC_U32
55565558
: AMDGPU::S_SUBB_U32;
55575559
Register DestLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
55585560
Register DestHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
@@ -5562,12 +5564,13 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55625564
MachineOperand Accumhi = TII->buildExtractSubRegOrImm(
55635565
MI, MRI, Accumulator->getOperand(0), DstRegClass, AMDGPU::sub1,
55645566
&AMDGPU::SReg_32RegClass);
5565-
BuildMI(*ComputeLoop, I, DL, TII->get(newOpc1), DestLo)
5567+
BuildMI(*ComputeLoop, I, DL, TII->get(NewOpc1), DestLo)
55665568
.add(Accumlo)
55675569
.addReg(LaneValueLo->getOperand(0).getReg());
5568-
BuildMI(*ComputeLoop, I, DL, TII->get(newOpc2), DestHi)
5570+
BuildMI(*ComputeLoop, I, DL, TII->get(NewOpc2), DestHi)
55695571
.add(Accumhi)
5570-
.addReg(LaneValueHi->getOperand(0).getReg());
5572+
.addReg(LaneValueHi->getOperand(0).getReg())
5573+
.setOperandDead(3); // Dead scc
55715574
NewAccumulator = BuildMI(*ComputeLoop, I, DL,
55725575
TII->get(TargetOpcode::REG_SEQUENCE), DstReg)
55735576
.addReg(DestLo)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,9 +1833,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
18331833
; GFX8DAGISEL-LABEL: divergent_value_i64:
18341834
; GFX8DAGISEL: ; %bb.0: ; %entry
18351835
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1836-
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
1836+
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0
18371837
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1838-
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
18391838
; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
18401839
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
18411840
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -1855,9 +1854,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
18551854
; GFX8GISEL-LABEL: divergent_value_i64:
18561855
; GFX8GISEL: ; %bb.0: ; %entry
18571856
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1858-
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
1857+
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0
18591858
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
1860-
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
18611859
; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
18621860
; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
18631861
; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -1877,9 +1875,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
18771875
; GFX9DAGISEL-LABEL: divergent_value_i64:
18781876
; GFX9DAGISEL: ; %bb.0: ; %entry
18791877
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1880-
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
1878+
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0
18811879
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1882-
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
18831880
; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
18841881
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
18851882
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -1899,9 +1896,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
18991896
; GFX9GISEL-LABEL: divergent_value_i64:
19001897
; GFX9GISEL: ; %bb.0: ; %entry
19011898
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1902-
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
1899+
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0
19031900
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
1904-
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
19051901
; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
19061902
; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
19071903
; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -1921,9 +1917,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
19211917
; GFX1064DAGISEL-LABEL: divergent_value_i64:
19221918
; GFX1064DAGISEL: ; %bb.0: ; %entry
19231919
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1924-
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
1920+
; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0
19251921
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
1926-
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
19271922
; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
19281923
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
19291924
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -1942,9 +1937,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
19421937
; GFX1064GISEL-LABEL: divergent_value_i64:
19431938
; GFX1064GISEL: ; %bb.0: ; %entry
19441939
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1945-
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
1940+
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0
19461941
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
1947-
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
19481942
; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
19491943
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
19501944
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -1963,9 +1957,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
19631957
; GFX1032DAGISEL-LABEL: divergent_value_i64:
19641958
; GFX1032DAGISEL: ; %bb.0: ; %entry
19651959
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1966-
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
1960+
; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0
19671961
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
1968-
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
19691962
; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
19701963
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
19711964
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
@@ -1984,9 +1977,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
19841977
; GFX1032GISEL-LABEL: divergent_value_i64:
19851978
; GFX1032GISEL: ; %bb.0: ; %entry
19861979
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1987-
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
1980+
; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0
19881981
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
1989-
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
19901982
; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
19911983
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
19921984
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
@@ -2005,12 +1997,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
20051997
; GFX1164DAGISEL-LABEL: divergent_value_i64:
20061998
; GFX1164DAGISEL: ; %bb.0: ; %entry
20071999
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008-
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
2000+
; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0
20092001
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
2010-
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
20112002
; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2003+
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
20122004
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
2013-
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
20142005
; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4
20152006
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v3, s4
20162007
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s4
@@ -2027,12 +2018,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
20272018
; GFX1164GISEL-LABEL: divergent_value_i64:
20282019
; GFX1164GISEL: ; %bb.0: ; %entry
20292020
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2030-
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
2021+
; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0
20312022
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
2032-
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
20332023
; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2024+
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
20342025
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
2035-
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
20362026
; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4
20372027
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v3, s4
20382028
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s4
@@ -2049,12 +2039,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
20492039
; GFX1132DAGISEL-LABEL: divergent_value_i64:
20502040
; GFX1132DAGISEL: ; %bb.0: ; %entry
20512041
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052-
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
2042+
; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0
20532043
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
2054-
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
20552044
; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2045+
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
20562046
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
2057-
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
20582047
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
20592048
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3
20602049
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
@@ -2070,12 +2059,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
20702059
; GFX1132GISEL-LABEL: divergent_value_i64:
20712060
; GFX1132GISEL: ; %bb.0: ; %entry
20722061
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2073-
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
2062+
; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0
20742063
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
2075-
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
20762064
; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2065+
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
20772066
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
2078-
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
20792067
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
20802068
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3
20812069
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2036,9 +2036,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
20362036
; GFX8DAGISEL-LABEL: divergent_value_i64:
20372037
; GFX8DAGISEL: ; %bb.0: ; %entry
20382038
; GFX8DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039-
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
2039+
; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], 0
20402040
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
2041-
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
20422041
; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
20432042
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
20442043
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -2058,9 +2057,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
20582057
; GFX8GISEL-LABEL: divergent_value_i64:
20592058
; GFX8GISEL: ; %bb.0: ; %entry
20602059
; GFX8GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2061-
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
2060+
; GFX8GISEL-NEXT: s_mov_b64 s[4:5], 0
20622061
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
2063-
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
20642062
; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
20652063
; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
20662064
; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -2080,9 +2078,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
20802078
; GFX9DAGISEL-LABEL: divergent_value_i64:
20812079
; GFX9DAGISEL: ; %bb.0: ; %entry
20822080
; GFX9DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2083-
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
2081+
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], 0
20842082
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
2085-
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
20862083
; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
20872084
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
20882085
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -2102,9 +2099,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
21022099
; GFX9GISEL-LABEL: divergent_value_i64:
21032100
; GFX9GISEL: ; %bb.0: ; %entry
21042101
; GFX9GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2105-
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
2102+
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], 0
21062103
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
2107-
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
21082104
; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
21092105
; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
21102106
; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -2124,9 +2120,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
21242120
; GFX1064DAGISEL-LABEL: divergent_value_i64:
21252121
; GFX1064DAGISEL: ; %bb.0: ; %entry
21262122
; GFX1064DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2127-
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
2123+
; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], 0
21282124
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
2129-
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
21302125
; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
21312126
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
21322127
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -2145,9 +2140,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
21452140
; GFX1064GISEL-LABEL: divergent_value_i64:
21462141
; GFX1064GISEL: ; %bb.0: ; %entry
21472142
; GFX1064GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2148-
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
2143+
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], 0
21492144
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
2150-
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
21512145
; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
21522146
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
21532147
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8
@@ -2166,9 +2160,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
21662160
; GFX1032DAGISEL-LABEL: divergent_value_i64:
21672161
; GFX1032DAGISEL: ; %bb.0: ; %entry
21682162
; GFX1032DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169-
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
2163+
; GFX1032DAGISEL-NEXT: s_mov_b64 s[4:5], 0
21702164
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
2171-
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
21722165
; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
21732166
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
21742167
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
@@ -2187,9 +2180,8 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
21872180
; GFX1032GISEL-LABEL: divergent_value_i64:
21882181
; GFX1032GISEL: ; %bb.0: ; %entry
21892182
; GFX1032GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2190-
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
2183+
; GFX1032GISEL-NEXT: s_mov_b64 s[4:5], 0
21912184
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
2192-
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
21932185
; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
21942186
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
21952187
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
@@ -2208,12 +2200,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
22082200
; GFX1164DAGISEL-LABEL: divergent_value_i64:
22092201
; GFX1164DAGISEL: ; %bb.0: ; %entry
22102202
; GFX1164DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2211-
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
2203+
; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], 0
22122204
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
2213-
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
22142205
; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2206+
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
22152207
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
2216-
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
22172208
; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4
22182209
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v3, s4
22192210
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s4
@@ -2230,12 +2221,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
22302221
; GFX1164GISEL-LABEL: divergent_value_i64:
22312222
; GFX1164GISEL: ; %bb.0: ; %entry
22322223
; GFX1164GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2233-
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
2224+
; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], 0
22342225
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
2235-
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
22362226
; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2227+
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
22372228
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
2238-
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
22392229
; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4
22402230
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v3, s4
22412231
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s4
@@ -2252,12 +2242,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
22522242
; GFX1132DAGISEL-LABEL: divergent_value_i64:
22532243
; GFX1132DAGISEL: ; %bb.0: ; %entry
22542244
; GFX1132DAGISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2255-
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
2245+
; GFX1132DAGISEL-NEXT: s_mov_b64 s[0:1], 0
22562246
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
2257-
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
22582247
; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2248+
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
22592249
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
2260-
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
22612250
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
22622251
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v3, s3
22632252
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
@@ -2273,12 +2262,11 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
22732262
; GFX1132GISEL-LABEL: divergent_value_i64:
22742263
; GFX1132GISEL: ; %bb.0: ; %entry
22752264
; GFX1132GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2276-
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
2265+
; GFX1132GISEL-NEXT: s_mov_b64 s[0:1], 0
22772266
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
2278-
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
22792267
; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
2268+
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
22802269
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
2281-
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
22822270
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
22832271
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v3, s3
22842272
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3

0 commit comments

Comments
 (0)