Skip to content

Commit 7dfd5ba

Browse files
authored
[AMDGPU] Update codegen tests for PR #154069 (#154862)
1 parent 37664cd commit 7dfd5ba

File tree

4 files changed

+723
-193
lines changed

4 files changed

+723
-193
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel %s -o - 2>&1 | FileCheck %s
23

34
; This file checks that the translation from llvm IR to generic
45
; MachineInstr is correct.
56

67
; Tests for add.
7-
; CHECK: name: addi32
8-
; CHECK: {{%[0-9]+}}:_(s32) = G_ADD
9-
define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) {
8+
define void @addi32(i32 %arg1, i32 %arg2) {
9+
; CHECK-LABEL: name: addi32
10+
; CHECK: bb.1 (%ir-block.0):
11+
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
15+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
16+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
17+
; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
18+
; CHECK-NEXT: SI_RETURN
1019
%res = add i32 %arg1, %arg2
1120
store i32 %res, ptr addrspace(1) poison
1221
ret void

llvm/test/CodeGen/AMDGPU/add_i64.ll

Lines changed: 106 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI %s
23

3-
4-
declare i32 @llvm.amdgcn.workitem.id.x() readnone
5-
6-
; SI-LABEL: {{^}}test_i64_vreg:
7-
; SI: v_add_i32
8-
; SI: v_addc_u32
94
define amdgpu_kernel void @test_i64_vreg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
5+
; SI-LABEL: test_i64_vreg:
6+
; SI: ; %bb.0:
7+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
8+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
9+
; SI-NEXT: s_mov_b32 s11, 0xf000
10+
; SI-NEXT: s_mov_b32 s14, 0
11+
; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
12+
; SI-NEXT: v_mov_b32_e32 v1, 0
13+
; SI-NEXT: s_mov_b32 s15, s11
14+
; SI-NEXT: s_mov_b64 s[6:7], s[14:15]
15+
; SI-NEXT: s_waitcnt lgkmcnt(0)
16+
; SI-NEXT: s_mov_b64 s[12:13], s[2:3]
17+
; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[12:15], 0 addr64
18+
; SI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
19+
; SI-NEXT: s_mov_b32 s10, -1
20+
; SI-NEXT: s_mov_b32 s8, s0
21+
; SI-NEXT: s_mov_b32 s9, s1
22+
; SI-NEXT: s_waitcnt vmcnt(0)
23+
; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
24+
; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
25+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
26+
; SI-NEXT: s_endpgm
1027
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
1128
%a_ptr = getelementptr i64, ptr addrspace(1) %inA, i32 %tid
1229
%b_ptr = getelementptr i64, ptr addrspace(1) %inB, i32 %tid
@@ -18,10 +35,22 @@ define amdgpu_kernel void @test_i64_vreg(ptr addrspace(1) noalias %out, ptr addr
1835
}
1936

2037
; Check that the SGPR add operand is correctly moved to a VGPR.
21-
; SI-LABEL: {{^}}sgpr_operand:
22-
; SI: s_add_u32
23-
; SI: s_addc_u32
2438
define amdgpu_kernel void @sgpr_operand(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %in_bar, i64 %a) {
39+
; SI-LABEL: sgpr_operand:
40+
; SI: ; %bb.0:
41+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
42+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xf
43+
; SI-NEXT: s_waitcnt lgkmcnt(0)
44+
; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
45+
; SI-NEXT: s_mov_b32 s3, 0xf000
46+
; SI-NEXT: s_waitcnt lgkmcnt(0)
47+
; SI-NEXT: s_add_u32 s4, s6, s4
48+
; SI-NEXT: s_addc_u32 s5, s7, s5
49+
; SI-NEXT: s_mov_b32 s2, -1
50+
; SI-NEXT: v_mov_b32_e32 v0, s4
51+
; SI-NEXT: v_mov_b32_e32 v1, s5
52+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
53+
; SI-NEXT: s_endpgm
2554
%foo = load i64, ptr addrspace(1) %in, align 8
2655
%result = add i64 %foo, %a
2756
store i64 %result, ptr addrspace(1) %out
@@ -30,35 +59,76 @@ define amdgpu_kernel void @sgpr_operand(ptr addrspace(1) noalias %out, ptr addrs
3059

3160
; Swap the arguments. Check that the SGPR -> VGPR copy works with the
3261
; SGPR as other operand.
33-
;
34-
; SI-LABEL: {{^}}sgpr_operand_reversed:
35-
; SI: s_add_u32
36-
; SI: s_addc_u32
3762
define amdgpu_kernel void @sgpr_operand_reversed(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, i64 %a) {
63+
; SI-LABEL: sgpr_operand_reversed:
64+
; SI: ; %bb.0:
65+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
66+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
67+
; SI-NEXT: s_waitcnt lgkmcnt(0)
68+
; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
69+
; SI-NEXT: s_mov_b32 s3, 0xf000
70+
; SI-NEXT: s_waitcnt lgkmcnt(0)
71+
; SI-NEXT: s_add_u32 s4, s4, s6
72+
; SI-NEXT: s_addc_u32 s5, s5, s7
73+
; SI-NEXT: s_mov_b32 s2, -1
74+
; SI-NEXT: v_mov_b32_e32 v0, s4
75+
; SI-NEXT: v_mov_b32_e32 v1, s5
76+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
77+
; SI-NEXT: s_endpgm
3878
%foo = load i64, ptr addrspace(1) %in, align 8
3979
%result = add i64 %a, %foo
4080
store i64 %result, ptr addrspace(1) %out
4181
ret void
4282
}
4383

44-
45-
; SI-LABEL: {{^}}test_v2i64_sreg:
46-
; SI: s_add_u32
47-
; SI: s_addc_u32
48-
; SI: s_add_u32
49-
; SI: s_addc_u32
5084
define amdgpu_kernel void @test_v2i64_sreg(ptr addrspace(1) noalias %out, <2 x i64> %a, <2 x i64> %b) {
85+
; SI-LABEL: test_v2i64_sreg:
86+
; SI: ; %bb.0:
87+
; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xd
88+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
89+
; SI-NEXT: s_mov_b32 s3, 0xf000
90+
; SI-NEXT: s_waitcnt lgkmcnt(0)
91+
; SI-NEXT: s_add_u32 s4, s10, s14
92+
; SI-NEXT: s_addc_u32 s5, s11, s15
93+
; SI-NEXT: s_add_u32 s6, s8, s12
94+
; SI-NEXT: s_addc_u32 s7, s9, s13
95+
; SI-NEXT: s_mov_b32 s2, -1
96+
; SI-NEXT: v_mov_b32_e32 v2, s4
97+
; SI-NEXT: v_mov_b32_e32 v3, s5
98+
; SI-NEXT: v_mov_b32_e32 v0, s6
99+
; SI-NEXT: v_mov_b32_e32 v1, s7
100+
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
101+
; SI-NEXT: s_endpgm
51102
%result = add <2 x i64> %a, %b
52103
store <2 x i64> %result, ptr addrspace(1) %out
53104
ret void
54105
}
55106

56-
; SI-LABEL: {{^}}test_v2i64_vreg:
57-
; SI: v_add_i32
58-
; SI: v_addc_u32
59-
; SI: v_add_i32
60-
; SI: v_addc_u32
61107
define amdgpu_kernel void @test_v2i64_vreg(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
108+
; SI-LABEL: test_v2i64_vreg:
109+
; SI: ; %bb.0:
110+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
111+
; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
112+
; SI-NEXT: s_mov_b32 s11, 0xf000
113+
; SI-NEXT: s_mov_b32 s14, 0
114+
; SI-NEXT: v_lshlrev_b32_e32 v4, 4, v0
115+
; SI-NEXT: v_mov_b32_e32 v5, 0
116+
; SI-NEXT: s_mov_b32 s15, s11
117+
; SI-NEXT: s_mov_b64 s[6:7], s[14:15]
118+
; SI-NEXT: s_waitcnt lgkmcnt(0)
119+
; SI-NEXT: s_mov_b64 s[12:13], s[2:3]
120+
; SI-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[12:15], 0 addr64
121+
; SI-NEXT: buffer_load_dwordx4 v[4:7], v[4:5], s[4:7], 0 addr64
122+
; SI-NEXT: s_mov_b32 s10, -1
123+
; SI-NEXT: s_mov_b32 s8, s0
124+
; SI-NEXT: s_mov_b32 s9, s1
125+
; SI-NEXT: s_waitcnt vmcnt(0)
126+
; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v6
127+
; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
128+
; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4
129+
; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
130+
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
131+
; SI-NEXT: s_endpgm
62132
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
63133
%a_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inA, i32 %tid
64134
%b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inB, i32 %tid
@@ -69,14 +139,19 @@ define amdgpu_kernel void @test_v2i64_vreg(ptr addrspace(1) noalias %out, ptr ad
69139
ret void
70140
}
71141

72-
; SI-LABEL: {{^}}trunc_i64_add_to_i32:
73-
; SI: s_load_dword s[[SREG0:[0-9]+]]
74-
; SI: s_load_dword s[[SREG1:[0-9]+]]
75-
; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
76-
; SI-NOT: addc
77-
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
78-
; SI: buffer_store_dword [[VRESULT]],
79142
define amdgpu_kernel void @trunc_i64_add_to_i32(ptr addrspace(1) %out, i32, i64 %a, i32, i64 %b) {
143+
; SI-LABEL: trunc_i64_add_to_i32:
144+
; SI: ; %bb.0:
145+
; SI-NEXT: s_load_dword s2, s[4:5], 0xd
146+
; SI-NEXT: s_load_dword s6, s[4:5], 0x11
147+
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
148+
; SI-NEXT: s_mov_b32 s3, 0xf000
149+
; SI-NEXT: s_waitcnt lgkmcnt(0)
150+
; SI-NEXT: s_add_i32 s4, s6, s2
151+
; SI-NEXT: s_mov_b32 s2, -1
152+
; SI-NEXT: v_mov_b32_e32 v0, s4
153+
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
154+
; SI-NEXT: s_endpgm
80155
%add = add i64 %b, %a
81156
%trunc = trunc i64 %add to i32
82157
store i32 %trunc, ptr addrspace(1) %out, align 8

0 commit comments

Comments
 (0)