Skip to content

Commit 83abf51

Browse files
committed
Inflate src reg for subreg inserts
Change-Id: I8562c6fae3b4aefd6ddbf3f6dbad18ffa0cf6331
1 parent b941f83 commit 83abf51

10 files changed

+8247
-6276
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -518,9 +518,10 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
518518
SrcIdx = DstSub;
519519
NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
520520
if (!NewRC) {
521-
auto SuperDstRC = MRI.getLargestConstrainedSuperClass(Dst);
522-
if (SuperDstRC != DstRC)
523-
NewRC = TRI.getMatchingSuperRegClass(SuperDstRC, SrcRC, DstSub);
521+
auto SuperSrcRC = MRI.getLargestConstrainedSuperClass(Src);
522+
if (SuperSrcRC != SrcRC) {
523+
NewRC = TRI.getMatchingSuperRegClass(DstRC, SuperSrcRC, DstSub);
524+
}
524525
}
525526
} else if (SrcSub) {
526527
// DstReg will be merged with a sub-register of SrcReg.

llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir

Lines changed: 63 additions & 93 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -160,22 +160,22 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
160160
; GCN-LABEL: test_mfma_f32_32x32x16_bf16__mac:
161161
; GCN: ; %bb.0:
162162
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163-
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
164-
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
165-
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
166-
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
167-
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
168-
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
169-
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
170-
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
171-
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
172-
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
173-
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
174-
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
175-
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
176-
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
177-
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
178163
; GCN-NEXT: v_accvgpr_write_b32 a15, v23
164+
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
165+
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
166+
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
167+
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
168+
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
169+
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
170+
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
171+
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
172+
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
173+
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
174+
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
175+
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
176+
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
177+
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
178+
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
179179
; GCN-NEXT: s_nop 1
180180
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181181
; GCN-NEXT: s_nop 7
@@ -205,22 +205,22 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
205205
; GCN-LABEL: test_mfma_f32_32x32x16_bf16__mac__flags:
206206
; GCN: ; %bb.0:
207207
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208-
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
209-
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
210-
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
211-
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
212-
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
213-
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
214-
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
215-
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
216-
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
217-
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
218-
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
219-
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
220-
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
221-
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
222-
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
223208
; GCN-NEXT: v_accvgpr_write_b32 a15, v23
209+
; GCN-NEXT: v_accvgpr_write_b32 a14, v22
210+
; GCN-NEXT: v_accvgpr_write_b32 a13, v21
211+
; GCN-NEXT: v_accvgpr_write_b32 a12, v20
212+
; GCN-NEXT: v_accvgpr_write_b32 a11, v19
213+
; GCN-NEXT: v_accvgpr_write_b32 a10, v18
214+
; GCN-NEXT: v_accvgpr_write_b32 a9, v17
215+
; GCN-NEXT: v_accvgpr_write_b32 a8, v16
216+
; GCN-NEXT: v_accvgpr_write_b32 a7, v15
217+
; GCN-NEXT: v_accvgpr_write_b32 a6, v14
218+
; GCN-NEXT: v_accvgpr_write_b32 a5, v13
219+
; GCN-NEXT: v_accvgpr_write_b32 a4, v12
220+
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
221+
; GCN-NEXT: v_accvgpr_write_b32 a2, v10
222+
; GCN-NEXT: v_accvgpr_write_b32 a1, v9
223+
; GCN-NEXT: v_accvgpr_write_b32 a0, v8
224224
; GCN-NEXT: s_nop 1
225225
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226226
; GCN-NEXT: s_nop 7

0 commit comments

Comments
 (0)