Skip to content

Commit 1ff6bfe

Browse files
authored
AMDGPU: Add VS_64_Align2 class (#156132)
We need an aligned version of the VS class to properly represent operand constraints. This fixes regressions with #155559
1 parent e591df6 commit 1ff6bfe

13 files changed

+165
-152
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3944,6 +3944,8 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
39443944
return RC.hasSuperClassEq(
39453945
getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
39463946

3947+
assert(&RC != &AMDGPU::VS_64RegClass);
3948+
39473949
return true;
39483950
}
39493951

@@ -3956,6 +3958,9 @@ SIRegisterInfo::getProperlyAlignedRC(const TargetRegisterClass *RC) const {
39563958
if (Size <= 32)
39573959
return RC;
39583960

3961+
if (RC == &AMDGPU::VS_64RegClass)
3962+
return &AMDGPU::VS_64_Align2RegClass;
3963+
39593964
if (isVGPRClass(RC))
39603965
return getAlignedVGPRClassForBitWidth(Size);
39613966
if (isAGPRClass(RC))

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,14 @@ def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_6
11071107
let Size = 64;
11081108
}
11091109

1110+
def VS_64_Align2 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
1111+
(add VReg_64_Align2, SReg_64)> {
1112+
let isAllocatable = 0;
1113+
let HasVGPR = 1;
1114+
let HasSGPR = 1;
1115+
let Size = 64;
1116+
}
1117+
11101118
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
11111119
let HasVGPR = 1;
11121120
let HasAGPR = 1;

llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir

Lines changed: 96 additions & 96 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ body: |
486486
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
487487
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
488488
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
489-
; CHECK-NEXT: INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 27983881 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
489+
; CHECK-NEXT: INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 28114953 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
490490
; CHECK-NEXT: S_ENDPGM 0
491491
bb.0:
492492
S_NOP 0, implicit-def $agpr0
@@ -516,7 +516,7 @@ body: |
516516
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
517517
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
518518
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
519-
INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 27983881 /* reguse:VReg_512_Align2 */, %0:vreg_512_align2
519+
INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 28114953 /* reguse:VReg_512_Align2 */, %0:vreg_512_align2
520520
S_ENDPGM 0
521521
522522
...
@@ -1368,7 +1368,7 @@ body: |
13681368
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
13691369
; CHECK-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
13701370
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
1371-
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 27983881 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
1371+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 28114953 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
13721372
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
13731373
; CHECK-NEXT: S_BRANCH %bb.2
13741374
; CHECK-NEXT: {{ $}}
@@ -1408,7 +1408,7 @@ body: |
14081408
undef %2.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
14091409
early-clobber %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %2, 0, 0, 0, implicit $mode, implicit $exec
14101410
early-clobber %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
1411-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 27983881 /* reguse:VReg_512_Align2 */, %4
1411+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 28114953 /* reguse:VReg_512_Align2 */, %4
14121412
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
14131413
S_BRANCH %bb.2
14141414
@@ -1726,7 +1726,7 @@ body: |
17261726
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
17271727
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
17281728
; CHECK-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
1729-
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 27983881 /* reguse:VReg_512_Align2 */, renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33
1729+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 28114953 /* reguse:VReg_512_Align2 */, renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33
17301730
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
17311731
; CHECK-NEXT: S_BRANCH %bb.2
17321732
; CHECK-NEXT: {{ $}}
@@ -1763,7 +1763,7 @@ body: |
17631763
undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
17641764
%0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
17651765
%4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
1766-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 27983881 /* reguse:VReg_512_Align2 */, %4
1766+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 28114953 /* reguse:VReg_512_Align2 */, %4
17671767
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
17681768
S_BRANCH %bb.2
17691769

llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
define amdgpu_kernel void @s_input_output_i128() {
99
; GFX908-LABEL: name: s_input_output_i128
1010
; GFX908: bb.0 (%ir-block.0):
11-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %13
11+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7340042 /* regdef:SGPR_128 */, def %13
1212
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %13
13-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, %14
13+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7340041 /* reguse:SGPR_128 */, [[COPY]]
1414
; GFX908-NEXT: S_ENDPGM 0
1515
;
1616
; GFX90A-LABEL: name: s_input_output_i128
1717
; GFX90A: bb.0 (%ir-block.0):
18-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7208970 /* regdef:SGPR_128 */, def %11
18+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7340042 /* regdef:SGPR_128 */, def %11
1919
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %11
20-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7208969 /* reguse:SGPR_128 */, %12
20+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7340041 /* reguse:SGPR_128 */, [[COPY]]
2121
; GFX90A-NEXT: S_ENDPGM 0
2222
%val = tail call i128 asm sideeffect "; def $0", "=s"()
2323
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -27,36 +27,36 @@ define amdgpu_kernel void @s_input_output_i128() {
2727
define amdgpu_kernel void @v_input_output_i128() {
2828
; GFX908-LABEL: name: v_input_output_i128
2929
; GFX908: bb.0 (%ir-block.0):
30-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %13
30+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %13
3131
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %13
32-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128 */, %14
32+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:VReg_128 */, [[COPY]]
3333
; GFX908-NEXT: S_ENDPGM 0
3434
;
3535
; GFX90A-LABEL: name: v_input_output_i128
3636
; GFX90A: bb.0 (%ir-block.0):
37-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %11
37+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %11
3838
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %11
39-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:VReg_128_Align2 */, %12
39+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6553609 /* reguse:VReg_128_Align2 */, [[COPY]]
4040
; GFX90A-NEXT: S_ENDPGM 0
4141
%val = tail call i128 asm sideeffect "; def $0", "=v"()
4242
call void asm sideeffect "; use $0", "v"(i128 %val)
4343
ret void
4444
}
4545

4646
define amdgpu_kernel void @a_input_output_i128() {
47+
4748
; GFX908-LABEL: name: a_input_output_i128
4849
; GFX908: bb.0 (%ir-block.0):
49-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6029322 /* regdef:AReg_128 */, def %13
50+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6160394 /* regdef:AReg_128 */, def %13
5051
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %13
51-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6029321 /* reguse:AReg_128 */, %14
52-
52+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6160393 /* reguse:AReg_128 */, [[COPY]]
5353
; GFX908-NEXT: S_ENDPGM 0
5454
;
5555
; GFX90A-LABEL: name: a_input_output_i128
5656
; GFX90A: bb.0 (%ir-block.0):
57-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:AReg_128_Align2 */, def %11
57+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:AReg_128_Align2 */, def %11
5858
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %11
59-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6291465 /* reguse:AReg_128_Align2 */, %12
59+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6422537 /* reguse:AReg_128_Align2 */, [[COPY]]
6060
; GFX90A-NEXT: S_ENDPGM 0
6161
%val = call i128 asm sideeffect "; def $0", "=a"()
6262
call void asm sideeffect "; use $0", "a"(i128 %val)

llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
1212
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
1313
; REGALLOC-GFX908-NEXT: {{ $}}
1414
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %6:agpr_32
15-
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %25
15+
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def %25
1616
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %25
1717
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %27
1818
; REGALLOC-GFX908-NEXT: SI_SPILL_AV64_SAVE %27, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -37,7 +37,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
3737
; PEI-GFX908-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
3838
; PEI-GFX908-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
3939
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
40-
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
40+
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6225930 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
4141
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
4242
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
4343
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
@@ -61,9 +61,9 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
6161
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
6262
; REGALLOC-GFX90A-NEXT: {{ $}}
6363
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %6:agpr_32
64-
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %23
64+
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def %23
6565
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %23
66-
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %21
66+
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3997706 /* regdef:VReg_64_Align2 */, def %21
6767
; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY %21
6868
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
6969
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
@@ -80,9 +80,9 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
8080
; PEI-GFX90A-NEXT: liveins: $sgpr4_sgpr5
8181
; PEI-GFX90A-NEXT: {{ $}}
8282
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef renamable $agpr0
83-
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
83+
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6553610 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
8484
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
85-
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def renamable $vgpr2_vgpr3
85+
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3997706 /* regdef:VReg_64_Align2 */, def renamable $vgpr2_vgpr3
8686
; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
8787
; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
8888
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec

llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,13 @@ body: |
4545
4646
INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $agpr0
4747
%14:vgpr_32 = COPY killed $agpr0
48-
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 27262986 /* regdef:VReg_512 */, def %7, 13565962 /* regdef:VReg_256 */, def %8, 6094858 /* regdef:VReg_128 */, def %9, 4784138 /* regdef:VReg_96 */, def %10, 4784138 /* regdef:VReg_96 */, def %11
48+
INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 27394058 /* regdef:VReg_512 */, def %7, 13697034 /* regdef:VReg_256 */, def %8, 6225930 /* regdef:VReg_128 */, def %9, 4915210 /* regdef:VReg_96 */, def %10, 4915210 /* regdef:VReg_96 */, def %11
4949
INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 12 /* clobber */, implicit-def dead early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
50-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 27262985 /* reguse:VReg_512 */, %7
51-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 13565961 /* reguse:VReg_256 */, %8
52-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6094857 /* reguse:VReg_128 */, %9
53-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_96 */, %10
54-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_96 */, %11
50+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 27394057 /* reguse:VReg_512 */, %7
51+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 13697033 /* reguse:VReg_256 */, %8
52+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 6225929 /* reguse:VReg_128 */, %9
53+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:VReg_96 */, %10
54+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:VReg_96 */, %11
5555
$agpr1 = COPY %14
5656
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, killed $agpr1
5757
SI_RETURN

llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,15 @@ body: |
4343
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
4444
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
4545
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
46-
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_64_Align2 */, [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
46+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3997705 /* reguse:VReg_64_Align2 */, [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
4747
; CHECK-NEXT: SI_RETURN
4848
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
4949
%1:av_64_align2 = COPY $vgpr0_vgpr1
5050
%2:av_64_align2 = COPY $vgpr2_vgpr3
5151
%3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
5252
%4:vreg_128_align2 = COPY %3
5353
%5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
54-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_64_Align2 */, %5
54+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3997705 /* reguse:VReg_64_Align2 */, %5
5555
SI_RETURN
5656
...
5757

0 commit comments

Comments
 (0)