Skip to content

Commit 2f8d699

Browse files
authored
[AMDGPU][SelectionDAG] Use COPY instead of S_MOV_B32 to assign values to M0 (#132957)
This is consistent with what's done on GISel. This allows the register coalescer to remove the redundant intermediate `s_mov_b32` instructions by using `m0` directly as the result register.
1 parent 53fa289 commit 2f8d699

12 files changed

+154
-274
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4691,7 +4691,7 @@ emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
46914691
} else {
46924692
// Move index from VCC into M0
46934693
if (Offset == 0) {
4694-
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
4694+
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
46954695
.addReg(CurrentIdxReg, RegState::Kill);
46964696
} else {
46974697
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
@@ -4805,7 +4805,7 @@ static void setM0ToIndexFromSGPR(const SIInstrInfo *TII,
48054805

48064806
if (Offset == 0) {
48074807
// clang-format off
4808-
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
4808+
BuildMI(*MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
48094809
.add(*Idx);
48104810
// clang-format on
48114811
} else {
@@ -5400,9 +5400,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
54005400
return BB;
54015401
}
54025402
case AMDGPU::SI_INIT_M0: {
5403+
MachineOperand &M0Init = MI.getOperand(0);
54035404
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
5404-
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
5405-
.add(MI.getOperand(0));
5405+
TII->get(M0Init.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32),
5406+
AMDGPU::M0)
5407+
.add(M0Init);
54065408
MI.eraseFromParent();
54075409
return BB;
54085410
}

llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -301,12 +301,11 @@ define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) {
301301
; GCN-NEXT: s_mov_b32 s10, s0
302302
; GCN-NEXT: s_mov_b32 s12, s0
303303
; GCN-NEXT: s_mov_b32 s14, s0
304-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
305-
; GCN-NEXT: s_lshl_b32 s18, s18, 1
306304
; GCN-NEXT: v_mov_b32_e32 v0, s0
307305
; GCN-NEXT: v_mov_b32_e32 v1, s1
308306
; GCN-NEXT: v_mov_b32_e32 v15, s15
309-
; GCN-NEXT: s_mov_b32 m0, s18
307+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
308+
; GCN-NEXT: s_lshl_b32 m0, s18, 1
310309
; GCN-NEXT: v_mov_b32_e32 v2, s2
311310
; GCN-NEXT: v_mov_b32_e32 v3, s3
312311
; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -352,11 +351,10 @@ define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) {
352351
; GCN-NEXT: s_mov_b32 s10, s0
353352
; GCN-NEXT: s_mov_b32 s12, s0
354353
; GCN-NEXT: s_waitcnt lgkmcnt(0)
355-
; GCN-NEXT: s_lshl_b32 s16, s16, 1
356354
; GCN-NEXT: v_mov_b32_e32 v0, s0
357355
; GCN-NEXT: v_mov_b32_e32 v1, s1
358356
; GCN-NEXT: v_mov_b32_e32 v15, s15
359-
; GCN-NEXT: s_mov_b32 m0, s16
357+
; GCN-NEXT: s_lshl_b32 m0, s16, 1
360358
; GCN-NEXT: v_mov_b32_e32 v2, s2
361359
; GCN-NEXT: v_mov_b32_e32 v3, s3
362360
; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -451,12 +449,11 @@ define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) {
451449
; GCN-NEXT: s_mov_b32 s60, s36
452450
; GCN-NEXT: s_mov_b32 s62, s36
453451
; GCN-NEXT: s_mov_b32 s64, s36
454-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
455-
; GCN-NEXT: s_lshl_b32 s2, s2, 1
456452
; GCN-NEXT: v_mov_b32_e32 v0, s36
457453
; GCN-NEXT: v_mov_b32_e32 v1, s37
458454
; GCN-NEXT: v_mov_b32_e32 v31, s67
459-
; GCN-NEXT: s_mov_b32 m0, s2
455+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
456+
; GCN-NEXT: s_lshl_b32 m0, s2, 1
460457
; GCN-NEXT: v_mov_b32_e32 v2, s38
461458
; GCN-NEXT: v_mov_b32_e32 v3, s39
462459
; GCN-NEXT: v_mov_b32_e32 v4, s40
@@ -535,12 +532,11 @@ define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) {
535532
; GCN-NEXT: s_mov_b32 s62, s36
536533
; GCN-NEXT: s_mov_b32 s64, s36
537534
; GCN-NEXT: s_mov_b32 s66, s36
538-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
539-
; GCN-NEXT: s_lshl_b32 s2, s2, 1
540535
; GCN-NEXT: v_mov_b32_e32 v0, s36
541536
; GCN-NEXT: v_mov_b32_e32 v1, s37
542537
; GCN-NEXT: v_mov_b32_e32 v31, s67
543-
; GCN-NEXT: s_mov_b32 m0, s2
538+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
539+
; GCN-NEXT: s_lshl_b32 m0, s2, 1
544540
; GCN-NEXT: v_mov_b32_e32 v2, s38
545541
; GCN-NEXT: v_mov_b32_e32 v3, s39
546542
; GCN-NEXT: v_mov_b32_e32 v4, s40

0 commit comments

Comments
 (0)