1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
23
3- ; GCN-LABEL: {{^}}test_i128_vreg:
4- ; GCN: v_add_i32_e32 v[[LO:[0-9]+]], vcc,
5- ; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
6- ; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
7- ; GCN-NEXT: v_addc_u32_e32 v[[HI:[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
8- ; GCN: buffer_store_dwordx4 v[[[LO]]:[[HI]]],
94define amdgpu_kernel void @test_i128_vreg (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %inA , ptr addrspace (1 ) noalias %inB ) {
5+ ; GCN-LABEL: test_i128_vreg:
6+ ; GCN: ; %bb.0:
7+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
8+ ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
9+ ; GCN-NEXT: s_mov_b32 s11, 0xf000
10+ ; GCN-NEXT: s_mov_b32 s14, 0
11+ ; GCN-NEXT: v_lshlrev_b32_e32 v4, 4, v0
12+ ; GCN-NEXT: v_mov_b32_e32 v5, 0
13+ ; GCN-NEXT: s_mov_b32 s15, s11
14+ ; GCN-NEXT: s_mov_b64 s[6:7], s[14:15]
15+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
16+ ; GCN-NEXT: s_mov_b64 s[12:13], s[2:3]
17+ ; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[12:15], 0 addr64
18+ ; GCN-NEXT: buffer_load_dwordx4 v[4:7], v[4:5], s[4:7], 0 addr64
19+ ; GCN-NEXT: s_mov_b32 s10, -1
20+ ; GCN-NEXT: s_mov_b32 s8, s0
21+ ; GCN-NEXT: s_mov_b32 s9, s1
22+ ; GCN-NEXT: s_waitcnt vmcnt(0)
23+ ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v4
24+ ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
25+ ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v2, v6, vcc
26+ ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
27+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
28+ ; GCN-NEXT: s_endpgm
1029 %tid = call i32 @llvm.amdgcn.workitem.id.x () readnone
1130 %a_ptr = getelementptr i128 , ptr addrspace (1 ) %inA , i32 %tid
1231 %b_ptr = getelementptr i128 , ptr addrspace (1 ) %inB , i32 %tid
@@ -18,36 +37,74 @@ define amdgpu_kernel void @test_i128_vreg(ptr addrspace(1) noalias %out, ptr add
1837}
1938
2039; Check that the SGPR add operand is correctly moved to a VGPR.
21- ; GCN-LABEL: {{^}}sgpr_operand:
22- ; GCN: s_add_u32
23- ; GCN: s_addc_u32
24- ; GCN: s_addc_u32
25- ; GCN: s_addc_u32
2640define amdgpu_kernel void @sgpr_operand (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , i128 %a ) {
41+ ; GCN-LABEL: sgpr_operand:
42+ ; GCN: ; %bb.0:
43+ ; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
44+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
45+ ; GCN-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
46+ ; GCN-NEXT: s_mov_b32 s3, 0xf000
47+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
48+ ; GCN-NEXT: s_add_u32 s4, s8, s4
49+ ; GCN-NEXT: s_addc_u32 s5, s9, s5
50+ ; GCN-NEXT: s_addc_u32 s6, s10, s6
51+ ; GCN-NEXT: s_addc_u32 s7, s11, s7
52+ ; GCN-NEXT: s_mov_b32 s2, -1
53+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
54+ ; GCN-NEXT: v_mov_b32_e32 v1, s5
55+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
56+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
57+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
58+ ; GCN-NEXT: s_endpgm
2759 %foo = load i128 , ptr addrspace (1 ) %in , align 8
2860 %result = add i128 %foo , %a
2961 store i128 %result , ptr addrspace (1 ) %out
3062 ret void
3163}
3264
33- ; GCN-LABEL: {{^}}sgpr_operand_reversed:
34- ; GCN: s_add_u32
35- ; GCN: s_addc_u32
36- ; GCN: s_addc_u32
37- ; GCN: s_addc_u32
3865define amdgpu_kernel void @sgpr_operand_reversed (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , i128 %a ) {
66+ ; GCN-LABEL: sgpr_operand_reversed:
67+ ; GCN: ; %bb.0:
68+ ; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
69+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
70+ ; GCN-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
71+ ; GCN-NEXT: s_mov_b32 s3, 0xf000
72+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
73+ ; GCN-NEXT: s_add_u32 s4, s4, s8
74+ ; GCN-NEXT: s_addc_u32 s5, s5, s9
75+ ; GCN-NEXT: s_addc_u32 s6, s6, s10
76+ ; GCN-NEXT: s_addc_u32 s7, s7, s11
77+ ; GCN-NEXT: s_mov_b32 s2, -1
78+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
79+ ; GCN-NEXT: v_mov_b32_e32 v1, s5
80+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
81+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
82+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
83+ ; GCN-NEXT: s_endpgm
3984 %foo = load i128 , ptr addrspace (1 ) %in , align 8
4085 %result = add i128 %a , %foo
4186 store i128 %result , ptr addrspace (1 ) %out
4287 ret void
4388}
4489
45- ; GCN-LABEL: {{^}}test_sreg:
46- ; GCN: s_add_u32
47- ; GCN: s_addc_u32
48- ; GCN: s_addc_u32
49- ; GCN: s_addc_u32
5090define amdgpu_kernel void @test_sreg (ptr addrspace (1 ) noalias %out , i128 %a , i128 %b ) {
91+ ; GCN-LABEL: test_sreg:
92+ ; GCN: ; %bb.0:
93+ ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xb
94+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
95+ ; GCN-NEXT: s_mov_b32 s3, 0xf000
96+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
97+ ; GCN-NEXT: s_add_u32 s4, s8, s12
98+ ; GCN-NEXT: s_addc_u32 s5, s9, s13
99+ ; GCN-NEXT: s_addc_u32 s6, s10, s14
100+ ; GCN-NEXT: s_addc_u32 s7, s11, s15
101+ ; GCN-NEXT: s_mov_b32 s2, -1
102+ ; GCN-NEXT: v_mov_b32_e32 v0, s4
103+ ; GCN-NEXT: v_mov_b32_e32 v1, s5
104+ ; GCN-NEXT: v_mov_b32_e32 v2, s6
105+ ; GCN-NEXT: v_mov_b32_e32 v3, s7
106+ ; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
107+ ; GCN-NEXT: s_endpgm
51108 %result = add i128 %a , %b
52109 store i128 %result , ptr addrspace (1 ) %out
53110 ret void
0 commit comments