Skip to content

Commit 3a7d14a

Browse files
authored
AMDGPU: Avoid using exact class check in reg_sequence AGPR fold (#156135)
This does better in cases which mix align2 and non-align2 classes.
1 parent 1e786fb commit 3a7d14a

File tree

2 files changed

+8
-11
lines changed

2 files changed

+8
-11
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1940,8 +1940,10 @@ bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const {
19401940
// Direct copy from SGPR to AGPR is not possible on gfx908. To avoid
19411941
// creation of exploded copies SGPR->VGPR->AGPR in the copyPhysReg()
19421942
// later, create a copy here and track if we already have such a copy.
1943-
if (TRI->getSubRegisterClass(MRI->getRegClass(Src.Reg), Src.SubReg) !=
1944-
VGPRUseSubRC) {
1943+
const TargetRegisterClass *SubRC =
1944+
TRI->getSubRegisterClass(MRI->getRegClass(Src.Reg), Src.SubReg);
1945+
if (!VGPRUseSubRC->hasSubClassEq(SubRC)) {
1946+
// TODO: Try to reconstrain class
19451947
VGPRCopy = MRI->createVirtualRegister(VGPRUseSubRC);
19461948
BuildMI(MBB, CopyMI, DL, TII->get(AMDGPU::COPY), VGPRCopy).add(*Def);
19471949
B.addReg(VGPRCopy);

llvm/test/CodeGen/AMDGPU/si-fold-operands-agpr-copy-reg-sequence.mir

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,7 @@ body: |
308308
; CHECK-LABEL: name: s_mov_b64_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr_elt64
309309
; CHECK: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
310310
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[V_MOV_B]], %subreg.sub2_sub3
311-
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[V_MOV_B]]
312-
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2_sub3
311+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[V_MOV_B]], %subreg.sub2_sub3
313312
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
314313
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
315314
%0:sreg_64 = S_MOV_B64 0
@@ -329,8 +328,7 @@ body: |
329328
; CHECK-LABEL: name: s_mov_b64_0_copy_vgpr_reg_sequence_128_splat_copy_to_agpr_subreg_elt32
330329
; CHECK: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
331330
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B]].sub0, %subreg.sub0, [[V_MOV_B]].sub1, %subreg.sub1, [[V_MOV_B]], %subreg.sub1_sub2
332-
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[V_MOV_B]]
333-
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]].sub0, %subreg.sub0, [[V_MOV_B]].sub1, %subreg.sub1, [[COPY]], %subreg.sub1_sub2
331+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]].sub0, %subreg.sub0, [[V_MOV_B]].sub1, %subreg.sub1, [[V_MOV_B]], %subreg.sub1_sub2
334332
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
335333
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
336334
%0:sreg_64 = S_MOV_B64 0
@@ -354,9 +352,7 @@ body: |
354352
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
355353
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
356354
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2_sub3
357-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[V_MOV_B]]
358-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]]
359-
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
355+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_MOV_B]], %subreg.sub0_sub1, [[COPY]], %subreg.sub2_sub3
360356
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
361357
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
362358
%0:sreg_64 = S_MOV_B64 0
@@ -381,8 +377,7 @@ body: |
381377
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9
382378
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[COPY]]
383379
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3
384-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY1]]
385-
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
380+
; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY1]], %subreg.sub2_sub3
386381
; CHECK-NEXT: $agpr0_agpr1_agpr2_agpr3 = COPY [[REG_SEQUENCE1]]
387382
; CHECK-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
388383
%0:sreg_64 = COPY $sgpr8_sgpr9

0 commit comments

Comments
 (0)