Skip to content

Commit 3b7c371

Browse files
jmmartinezDavid Salinas
authored andcommitted
[AMDGPU][SelectionDAG] Use COPY instead of S_MOV_B32 to assign values to M0 (llvm#132957)
This is consistent with what's done on GISel. This allows the register coalescer to remove the redundant intermediate `s_mov_b32` instructions by using `m0` directly as the result register. Change-Id: I7f4e19115ff2f95140c209049505cecaed90e4d5
1 parent 2199fbd commit 3b7c371

12 files changed

+154
-274
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4614,7 +4614,7 @@ emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
46144614
} else {
46154615
// Move index from VCC into M0
46164616
if (Offset == 0) {
4617-
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
4617+
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
46184618
.addReg(CurrentIdxReg, RegState::Kill);
46194619
} else {
46204620
BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
@@ -4728,7 +4728,7 @@ static void setM0ToIndexFromSGPR(const SIInstrInfo *TII,
47284728

47294729
if (Offset == 0) {
47304730
// clang-format off
4731-
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
4731+
BuildMI(*MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
47324732
.add(*Idx);
47334733
// clang-format on
47344734
} else {
@@ -5323,9 +5323,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
53235323
return BB;
53245324
}
53255325
case AMDGPU::SI_INIT_M0: {
5326+
MachineOperand &M0Init = MI.getOperand(0);
53265327
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
5327-
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
5328-
.add(MI.getOperand(0));
5328+
TII->get(M0Init.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32),
5329+
AMDGPU::M0)
5330+
.add(M0Init);
53295331
MI.eraseFromParent();
53305332
return BB;
53315333
}

llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -301,12 +301,11 @@ define amdgpu_kernel void @double8_extelt(ptr addrspace(1) %out, i32 %sel) {
301301
; GCN-NEXT: s_mov_b32 s10, s0
302302
; GCN-NEXT: s_mov_b32 s12, s0
303303
; GCN-NEXT: s_mov_b32 s14, s0
304-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
305-
; GCN-NEXT: s_lshl_b32 s18, s18, 1
306304
; GCN-NEXT: v_mov_b32_e32 v0, s0
307305
; GCN-NEXT: v_mov_b32_e32 v1, s1
308306
; GCN-NEXT: v_mov_b32_e32 v15, s15
309-
; GCN-NEXT: s_mov_b32 m0, s18
307+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
308+
; GCN-NEXT: s_lshl_b32 m0, s18, 1
310309
; GCN-NEXT: v_mov_b32_e32 v2, s2
311310
; GCN-NEXT: v_mov_b32_e32 v3, s3
312311
; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -352,11 +351,10 @@ define amdgpu_kernel void @double7_extelt(ptr addrspace(1) %out, i32 %sel) {
352351
; GCN-NEXT: s_mov_b32 s10, s0
353352
; GCN-NEXT: s_mov_b32 s12, s0
354353
; GCN-NEXT: s_waitcnt lgkmcnt(0)
355-
; GCN-NEXT: s_lshl_b32 s16, s16, 1
356354
; GCN-NEXT: v_mov_b32_e32 v0, s0
357355
; GCN-NEXT: v_mov_b32_e32 v1, s1
358356
; GCN-NEXT: v_mov_b32_e32 v15, s15
359-
; GCN-NEXT: s_mov_b32 m0, s16
357+
; GCN-NEXT: s_lshl_b32 m0, s16, 1
360358
; GCN-NEXT: v_mov_b32_e32 v2, s2
361359
; GCN-NEXT: v_mov_b32_e32 v3, s3
362360
; GCN-NEXT: v_mov_b32_e32 v4, s4
@@ -451,12 +449,11 @@ define amdgpu_kernel void @double15_extelt(ptr addrspace(1) %out, i32 %sel) {
451449
; GCN-NEXT: s_mov_b32 s60, s36
452450
; GCN-NEXT: s_mov_b32 s62, s36
453451
; GCN-NEXT: s_mov_b32 s64, s36
454-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
455-
; GCN-NEXT: s_lshl_b32 s2, s2, 1
456452
; GCN-NEXT: v_mov_b32_e32 v0, s36
457453
; GCN-NEXT: v_mov_b32_e32 v1, s37
458454
; GCN-NEXT: v_mov_b32_e32 v31, s67
459-
; GCN-NEXT: s_mov_b32 m0, s2
455+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
456+
; GCN-NEXT: s_lshl_b32 m0, s2, 1
460457
; GCN-NEXT: v_mov_b32_e32 v2, s38
461458
; GCN-NEXT: v_mov_b32_e32 v3, s39
462459
; GCN-NEXT: v_mov_b32_e32 v4, s40
@@ -535,12 +532,11 @@ define amdgpu_kernel void @double16_extelt(ptr addrspace(1) %out, i32 %sel) {
535532
; GCN-NEXT: s_mov_b32 s62, s36
536533
; GCN-NEXT: s_mov_b32 s64, s36
537534
; GCN-NEXT: s_mov_b32 s66, s36
538-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
539-
; GCN-NEXT: s_lshl_b32 s2, s2, 1
540535
; GCN-NEXT: v_mov_b32_e32 v0, s36
541536
; GCN-NEXT: v_mov_b32_e32 v1, s37
542537
; GCN-NEXT: v_mov_b32_e32 v31, s67
543-
; GCN-NEXT: s_mov_b32 m0, s2
538+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
539+
; GCN-NEXT: s_lshl_b32 m0, s2, 1
544540
; GCN-NEXT: v_mov_b32_e32 v2, s38
545541
; GCN-NEXT: v_mov_b32_e32 v3, s39
546542
; GCN-NEXT: v_mov_b32_e32 v4, s40

0 commit comments

Comments
 (0)