Skip to content

Commit aae4a04

Browse files
committed
[AMDGPU] Add aperture classes to VS_64
Should not do anything.
1 parent 76efbc0 commit aae4a04

11 files changed

+174
-172
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,15 +1131,16 @@ def VS_32_Lo256 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
11311131
let Size = 32;
11321132
}
11331133

1134-
def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64, SReg_64)> {
1134+
def VS_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
1135+
(add VReg_64, SReg_64_Encodable)> {
11351136
let isAllocatable = 0;
11361137
let HasVGPR = 1;
11371138
let HasSGPR = 1;
11381139
let Size = 64;
11391140
}
11401141

11411142
def VS_64_Align2 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
1142-
(add VReg_64_Align2, SReg_64)> {
1143+
(add VReg_64_Align2, SReg_64_Encodable)> {
11431144
let isAllocatable = 0;
11441145
let HasVGPR = 1;
11451146
let HasSGPR = 1;
@@ -1153,7 +1154,8 @@ def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_3
11531154
let Size = 32;
11541155
}
11551156

1156-
def VS_64_Lo256 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32, (add VReg_64_Lo256_Align2, SReg_64)> {
1157+
def VS_64_Lo256 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
1158+
(add VReg_64_Lo256_Align2, SReg_64_Encodable)> {
11571159
let isAllocatable = 0;
11581160
let HasVGPR = 1;
11591161
let HasSGPR = 1;

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ define float @test_multiple_register_outputs_same() #0 {
136136
define double @test_multiple_register_outputs_mixed() #0 {
137137
; CHECK-LABEL: name: test_multiple_register_outputs_mixed
138138
; CHECK: bb.1 (%ir-block.0):
139-
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %8, 3670026 /* regdef:VReg_64 */, def %9
139+
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 2031626 /* regdef:VGPR_32 */, def %8, 3735562 /* regdef:VReg_64 */, def %9
140140
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8
141141
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %9
142142
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)

llvm/test/CodeGen/AMDGPU/coalesce-copy-to-agpr-to-av-registers.mir

Lines changed: 115 additions & 115 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/inflate-reg-class-vgpr-mfma-to-av-with-load-source.mir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ body: |
486486
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
487487
; CHECK-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
488488
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
489-
; CHECK-NEXT: INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 38797321 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
489+
; CHECK-NEXT: INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 39190537 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
490490
; CHECK-NEXT: S_ENDPGM 0
491491
bb.0:
492492
S_NOP 0, implicit-def $agpr0
@@ -516,7 +516,7 @@ body: |
516516
S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
517517
S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
518518
S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
519-
INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 38797321 /* reguse:VReg_512_Align2 */, %0:vreg_512_align2
519+
INLINEASM &"; use $0 ", 1 /* sideeffect attdialect */, 39190537 /* reguse:VReg_512_Align2 */, %0:vreg_512_align2
520520
S_ENDPGM 0
521521
522522
...
@@ -1368,7 +1368,7 @@ body: |
13681368
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
13691369
; CHECK-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
13701370
; CHECK-NEXT: early-clobber renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec
1371-
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 38797321 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
1371+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39190537 /* reguse:VReg_512_Align2 */, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
13721372
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
13731373
; CHECK-NEXT: S_BRANCH %bb.2
13741374
; CHECK-NEXT: {{ $}}
@@ -1408,7 +1408,7 @@ body: |
14081408
undef %2.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
14091409
early-clobber %0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %2, 0, 0, 0, implicit $mode, implicit $exec
14101410
early-clobber %4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
1411-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 38797321 /* reguse:VReg_512_Align2 */, %4
1411+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39190537 /* reguse:VReg_512_Align2 */, %4
14121412
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
14131413
S_BRANCH %bb.2
14141414
@@ -1726,7 +1726,7 @@ body: |
17261726
; CHECK-NEXT: renamable $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s64), addrspace 1)
17271727
; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
17281728
; CHECK-NEXT: early-clobber renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr16_vgpr17, $vgpr16_vgpr17, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec
1729-
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 38797321 /* reguse:VReg_512_Align2 */, renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33
1729+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39190537 /* reguse:VReg_512_Align2 */, renamable $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33
17301730
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
17311731
; CHECK-NEXT: S_BRANCH %bb.2
17321732
; CHECK-NEXT: {{ $}}
@@ -1763,7 +1763,7 @@ body: |
17631763
undef %0.sub0_sub1:vreg_512_align2 = GLOBAL_LOAD_DWORDX2 undef %3:vreg_64_align2, 0, 0, implicit $exec :: (load (s64), addrspace 1)
17641764
%0:vreg_512_align2 = V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
17651765
%4:vreg_512_align2 = V_MFMA_F32_32X32X8F16_vgprcd_e64 %1, %1, %0, 0, 0, 0, implicit $mode, implicit $exec
1766-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 38797321 /* reguse:VReg_512_Align2 */, %4
1766+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 39190537 /* reguse:VReg_512_Align2 */, %4
17671767
S_CBRANCH_VCCNZ %bb.1, implicit $vcc
17681768
S_BRANCH %bb.2
17691769

llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@
88
define amdgpu_kernel void @s_input_output_i128() {
99
; GFX908-LABEL: name: s_input_output_i128
1010
; GFX908: bb.0 (%ir-block.0):
11-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 9240586 /* regdef:SGPR_128 */, def %13
11+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 9633802 /* regdef:SGPR_128 */, def %13
1212
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %13
13-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9240585 /* reguse:SGPR_128 */, [[COPY]]
13+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9633801 /* reguse:SGPR_128 */, [[COPY]]
1414
; GFX908-NEXT: S_ENDPGM 0
1515
;
1616
; GFX90A-LABEL: name: s_input_output_i128
1717
; GFX90A: bb.0 (%ir-block.0):
18-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 9240586 /* regdef:SGPR_128 */, def %11
18+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 9633802 /* regdef:SGPR_128 */, def %11
1919
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %11
20-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9240585 /* reguse:SGPR_128 */, [[COPY]]
20+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9633801 /* reguse:SGPR_128 */, [[COPY]]
2121
; GFX90A-NEXT: S_ENDPGM 0
2222
%val = tail call i128 asm sideeffect "; def $0", "=s"()
2323
call void asm sideeffect "; use $0", "s"(i128 %val)
@@ -27,16 +27,16 @@ define amdgpu_kernel void @s_input_output_i128() {
2727
define amdgpu_kernel void @v_input_output_i128() {
2828
; GFX908-LABEL: name: v_input_output_i128
2929
; GFX908: bb.0 (%ir-block.0):
30-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7405578 /* regdef:VReg_128 */, def %13
30+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7798794 /* regdef:VReg_128 */, def %13
3131
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %13
32-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7405577 /* reguse:VReg_128 */, [[COPY]]
32+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7798793 /* reguse:VReg_128 */, [[COPY]]
3333
; GFX908-NEXT: S_ENDPGM 0
3434
;
3535
; GFX90A-LABEL: name: v_input_output_i128
3636
; GFX90A: bb.0 (%ir-block.0):
37-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7602186 /* regdef:VReg_128_Align2 */, def %11
37+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7995402 /* regdef:VReg_128_Align2 */, def %11
3838
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %11
39-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7602185 /* reguse:VReg_128_Align2 */, [[COPY]]
39+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7995401 /* reguse:VReg_128_Align2 */, [[COPY]]
4040
; GFX90A-NEXT: S_ENDPGM 0
4141
%val = tail call i128 asm sideeffect "; def $0", "=v"()
4242
call void asm sideeffect "; use $0", "v"(i128 %val)
@@ -47,16 +47,16 @@ define amdgpu_kernel void @a_input_output_i128() {
4747

4848
; GFX908-LABEL: name: a_input_output_i128
4949
; GFX908: bb.0 (%ir-block.0):
50-
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7929866 /* regdef:AReg_128 */, def %13
50+
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8323082 /* regdef:AReg_128 */, def %13
5151
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %13
52-
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 7929865 /* reguse:AReg_128 */, [[COPY]]
52+
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 8323081 /* reguse:AReg_128 */, [[COPY]]
5353
; GFX908-NEXT: S_ENDPGM 0
5454
;
5555
; GFX90A-LABEL: name: a_input_output_i128
5656
; GFX90A: bb.0 (%ir-block.0):
57-
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8257546 /* regdef:AReg_128_Align2 */, def %11
57+
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 8650762 /* regdef:AReg_128_Align2 */, def %11
5858
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %11
59-
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 8257545 /* reguse:AReg_128_Align2 */, [[COPY]]
59+
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 8650761 /* reguse:AReg_128_Align2 */, [[COPY]]
6060
; GFX90A-NEXT: S_ENDPGM 0
6161
%val = call i128 asm sideeffect "; def $0", "=a"()
6262
call void asm sideeffect "; use $0", "a"(i128 %val)

llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
1212
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
1313
; REGALLOC-GFX908-NEXT: {{ $}}
1414
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef %6:agpr_32
15-
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7405578 /* regdef:VReg_128 */, def %25
15+
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7798794 /* regdef:VReg_128 */, def %25
1616
; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %25
17-
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3670026 /* regdef:VReg_64 */, def %27
17+
; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3735562 /* regdef:VReg_64 */, def %27
1818
; REGALLOC-GFX908-NEXT: SI_SPILL_AV64_SAVE %27, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
1919
; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]]
2020
; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
@@ -37,9 +37,9 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
3737
; PEI-GFX908-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
3838
; PEI-GFX908-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
3939
; PEI-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef renamable $agpr0
40-
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7405578 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
40+
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7798794 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
4141
; PEI-GFX908-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
42-
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3670026 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
42+
; PEI-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3735562 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
4343
; PEI-GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
4444
; PEI-GFX908-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
4545
; PEI-GFX908-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
@@ -61,9 +61,9 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
6161
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
6262
; REGALLOC-GFX90A-NEXT: {{ $}}
6363
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef %6:agpr_32
64-
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7602186 /* regdef:VReg_128_Align2 */, def %23
64+
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7995402 /* regdef:VReg_128_Align2 */, def %23
6565
; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %23
66-
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %21
66+
; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3997706 /* regdef:VReg_64_Align2 */, def %21
6767
; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY %21
6868
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
6969
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
@@ -80,9 +80,9 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
8080
; PEI-GFX90A-NEXT: liveins: $sgpr4_sgpr5
8181
; PEI-GFX90A-NEXT: {{ $}}
8282
; PEI-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2424841 /* reguse:AGPR_32 */, undef renamable $agpr0
83-
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7602186 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
83+
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7995402 /* regdef:VReg_128_Align2 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
8484
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
85-
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def renamable $vgpr2_vgpr3
85+
; PEI-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3997706 /* regdef:VReg_64_Align2 */, def renamable $vgpr2_vgpr3
8686
; PEI-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
8787
; PEI-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
8888
; PEI-GFX90A-NEXT: renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec

llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr-copy-from.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,17 @@ body: |
4141
; CHECK-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY $vgpr0_vgpr1
4242
; CHECK-NEXT: [[COPY2:%[0-9]+]]:av_64_align2 = COPY $vgpr2_vgpr3
4343
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:areg_128_align2 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (s128), addrspace 1)
44-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
45-
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_vgprcd_e64_:%[0-9]+]]:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
46-
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_64_Align2 */, [[V_MFMA_F64_4X4X4F64_vgprcd_e64_]]
44+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[GLOBAL_LOAD_DWORDX4_]]
45+
; CHECK-NEXT: [[V_MFMA_F64_4X4X4F64_e64_:%[0-9]+]]:areg_64_align2 = V_MFMA_F64_4X4X4F64_e64 [[COPY1]], [[COPY2]], [[COPY3]].sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
46+
; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3801097 /* reguse:AV_64_Align2 */, [[V_MFMA_F64_4X4X4F64_e64_]]
4747
; CHECK-NEXT: SI_RETURN
4848
%0:vreg_64_align2 = COPY $vgpr4_vgpr5
4949
%1:av_64_align2 = COPY $vgpr0_vgpr1
5050
%2:av_64_align2 = COPY $vgpr2_vgpr3
5151
%3:areg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec :: (load (s128), addrspace 1)
5252
%4:vreg_128_align2 = COPY %3
5353
%5:vreg_64_align2 = V_MFMA_F64_4X4X4F64_vgprcd_e64 %1, %2, %4.sub0_sub1, 0, 0, 0, implicit $mode, implicit $exec
54-
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3866633 /* reguse:VReg_64_Align2 */, %5
54+
INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3801097 /* reguse:VReg_64_Align2 */, %5
5555
SI_RETURN
5656
...
5757

0 commit comments

Comments
 (0)