11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2+ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
3+ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
34
45define amdgpu_ps void @v_interp_f32 (float inreg %i , float inreg %j , i32 inreg %m0 ) #0 {
5- ; GCN -LABEL: v_interp_f32:
6- ; GCN : ; %bb.0: ; %main_body
7- ; GCN -NEXT: s_mov_b32 s3, exec_lo
8- ; GCN -NEXT: s_wqm_b32 exec_lo, exec_lo
9- ; GCN -NEXT: s_mov_b32 m0, s2
10- ; GCN -NEXT: lds_param_load v0, attr0.y wait_vdst:15
11- ; GCN -NEXT: lds_param_load v1, attr1.x wait_vdst:15
12- ; GCN -NEXT: s_mov_b32 exec_lo, s3
13- ; GCN -NEXT: v_mov_b32_e32 v2, s0
14- ; GCN -NEXT: v_mov_b32_e32 v4, s1
15- ; GCN -NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
16- ; GCN -NEXT: v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1
17- ; GCN -NEXT: v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0
18- ; GCN -NEXT: v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7
19- ; GCN -NEXT: s_delay_alu instid0(VALU_DEP_1)
20- ; GCN -NEXT: v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7
21- ; GCN -NEXT: exp mrt0 v3, v2, v5, v4 done
22- ; GCN -NEXT: s_endpgm
6+ ; GFX11 -LABEL: v_interp_f32:
7+ ; GFX11 : ; %bb.0: ; %main_body
8+ ; GFX11 -NEXT: s_mov_b32 s3, exec_lo
9+ ; GFX11 -NEXT: s_wqm_b32 exec_lo, exec_lo
10+ ; GFX11 -NEXT: s_mov_b32 m0, s2
11+ ; GFX11 -NEXT: lds_param_load v0, attr0.y wait_vdst:15
12+ ; GFX11 -NEXT: lds_param_load v1, attr1.x wait_vdst:15
13+ ; GFX11 -NEXT: s_mov_b32 exec_lo, s3
14+ ; GFX11 -NEXT: v_mov_b32_e32 v2, s0
15+ ; GFX11 -NEXT: v_mov_b32_e32 v4, s1
16+ ; GFX11 -NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
17+ ; GFX11 -NEXT: v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1
18+ ; GFX11 -NEXT: v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0
19+ ; GFX11 -NEXT: v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7
20+ ; GFX11 -NEXT: s_delay_alu instid0(VALU_DEP_1)
21+ ; GFX11 -NEXT: v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7
22+ ; GFX11 -NEXT: exp mrt0 v3, v2, v5, v4 done
23+ ; GFX11 -NEXT: s_endpgm
2324main_body:
2425 %p0 = call float @llvm.amdgcn.lds.param.load (i32 1 , i32 0 , i32 %m0 )
2526 %p1 = call float @llvm.amdgcn.lds.param.load (i32 0 , i32 1 , i32 %m0 )
@@ -32,30 +33,30 @@ main_body:
3233}
3334
3435define amdgpu_ps void @v_interp_f32_many (float inreg %i , float inreg %j , i32 inreg %m0 ) #0 {
35- ; GCN -LABEL: v_interp_f32_many:
36- ; GCN : ; %bb.0: ; %main_body
37- ; GCN -NEXT: s_mov_b32 s3, exec_lo
38- ; GCN -NEXT: s_wqm_b32 exec_lo, exec_lo
39- ; GCN -NEXT: s_mov_b32 m0, s2
40- ; GCN -NEXT: lds_param_load v0, attr0.x wait_vdst:15
41- ; GCN -NEXT: lds_param_load v1, attr1.x wait_vdst:15
42- ; GCN -NEXT: lds_param_load v2, attr2.x wait_vdst:15
43- ; GCN -NEXT: lds_param_load v3, attr3.x wait_vdst:15
44- ; GCN -NEXT: s_mov_b32 exec_lo, s3
45- ; GCN -NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
46- ; GCN -NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
47- ; GCN -NEXT: v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3
48- ; GCN -NEXT: v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2
49- ; GCN -NEXT: v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1
50- ; GCN -NEXT: v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0
51- ; GCN -NEXT: v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7
52- ; GCN -NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
53- ; GCN -NEXT: v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7
54- ; GCN -NEXT: v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7
55- ; GCN -NEXT: s_delay_alu instid0(VALU_DEP_4)
56- ; GCN -NEXT: v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7
57- ; GCN -NEXT: exp mrt0 v6, v7, v8, v4 done
58- ; GCN -NEXT: s_endpgm
36+ ; GFX11 -LABEL: v_interp_f32_many:
37+ ; GFX11 : ; %bb.0: ; %main_body
38+ ; GFX11 -NEXT: s_mov_b32 s3, exec_lo
39+ ; GFX11 -NEXT: s_wqm_b32 exec_lo, exec_lo
40+ ; GFX11 -NEXT: s_mov_b32 m0, s2
41+ ; GFX11 -NEXT: lds_param_load v0, attr0.x wait_vdst:15
42+ ; GFX11 -NEXT: lds_param_load v1, attr1.x wait_vdst:15
43+ ; GFX11 -NEXT: lds_param_load v2, attr2.x wait_vdst:15
44+ ; GFX11 -NEXT: lds_param_load v3, attr3.x wait_vdst:15
45+ ; GFX11 -NEXT: s_mov_b32 exec_lo, s3
46+ ; GFX11 -NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
47+ ; GFX11 -NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
48+ ; GFX11 -NEXT: v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3
49+ ; GFX11 -NEXT: v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2
50+ ; GFX11 -NEXT: v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1
51+ ; GFX11 -NEXT: v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0
52+ ; GFX11 -NEXT: v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7
53+ ; GFX11 -NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
54+ ; GFX11 -NEXT: v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7
55+ ; GFX11 -NEXT: v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7
56+ ; GFX11 -NEXT: s_delay_alu instid0(VALU_DEP_4)
57+ ; GFX11 -NEXT: v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7
58+ ; GFX11 -NEXT: exp mrt0 v6, v7, v8, v4 done
59+ ; GFX11 -NEXT: s_endpgm
5960main_body:
6061 %p0 = call float @llvm.amdgcn.lds.param.load (i32 0 , i32 0 , i32 %m0 )
6162 %p1 = call float @llvm.amdgcn.lds.param.load (i32 0 , i32 1 , i32 %m0 )
@@ -74,30 +75,30 @@ main_body:
7475}
7576
7677define amdgpu_ps void @v_interp_f32_many_vm (ptr addrspace (1 ) %ptr , i32 inreg %m0 ) #0 {
77- ; GCN -LABEL: v_interp_f32_many_vm:
78- ; GCN : ; %bb.0: ; %main_body
79- ; GCN -NEXT: global_load_b64 v[0:1], v[0:1], off offset:4
80- ; GCN -NEXT: s_mov_b32 m0, s0
81- ; GCN -NEXT: s_mov_b32 s0, exec_lo
82- ; GCN -NEXT: s_wqm_b32 exec_lo, exec_lo
83- ; GCN -NEXT: lds_param_load v2, attr0.x wait_vdst:15
84- ; GCN -NEXT: lds_param_load v3, attr1.x wait_vdst:15
85- ; GCN -NEXT: lds_param_load v4, attr2.x wait_vdst:15
86- ; GCN -NEXT: lds_param_load v5, attr3.x wait_vdst:15
87- ; GCN -NEXT: s_mov_b32 exec_lo, s0
88- ; GCN -NEXT: s_waitcnt vmcnt(0)
89- ; GCN -NEXT: v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3
90- ; GCN -NEXT: v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2
91- ; GCN -NEXT: v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1
92- ; GCN -NEXT: v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0
93- ; GCN -NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
94- ; GCN -NEXT: v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7
95- ; GCN -NEXT: v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7
96- ; GCN -NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
97- ; GCN -NEXT: v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7
98- ; GCN -NEXT: v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7
99- ; GCN -NEXT: exp mrt0 v6, v7, v8, v0 done
100- ; GCN -NEXT: s_endpgm
78+ ; GFX11 -LABEL: v_interp_f32_many_vm:
79+ ; GFX11 : ; %bb.0: ; %main_body
80+ ; GFX11 -NEXT: global_load_b64 v[0:1], v[0:1], off offset:4
81+ ; GFX11 -NEXT: s_mov_b32 m0, s0
82+ ; GFX11 -NEXT: s_mov_b32 s0, exec_lo
83+ ; GFX11 -NEXT: s_wqm_b32 exec_lo, exec_lo
84+ ; GFX11 -NEXT: lds_param_load v2, attr0.x wait_vdst:15
85+ ; GFX11 -NEXT: lds_param_load v3, attr1.x wait_vdst:15
86+ ; GFX11 -NEXT: lds_param_load v4, attr2.x wait_vdst:15
87+ ; GFX11 -NEXT: lds_param_load v5, attr3.x wait_vdst:15
88+ ; GFX11 -NEXT: s_mov_b32 exec_lo, s0
89+ ; GFX11 -NEXT: s_waitcnt vmcnt(0)
90+ ; GFX11 -NEXT: v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3
91+ ; GFX11 -NEXT: v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2
92+ ; GFX11 -NEXT: v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1
93+ ; GFX11 -NEXT: v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0
94+ ; GFX11 -NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
95+ ; GFX11 -NEXT: v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7
96+ ; GFX11 -NEXT: v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7
97+ ; GFX11 -NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
98+ ; GFX11 -NEXT: v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7
99+ ; GFX11 -NEXT: v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7
100+ ; GFX11 -NEXT: exp mrt0 v6, v7, v8, v0 done
101+ ; GFX11 -NEXT: s_endpgm
101102main_body:
102103 %i.ptr = getelementptr float , ptr addrspace (1 ) %ptr , i32 1
103104 %i = load float , ptr addrspace (1 ) %i.ptr , align 4
@@ -120,23 +121,41 @@ main_body:
120121}
121122
122123define amdgpu_ps half @v_interp_f16 (float inreg %i , float inreg %j , i32 inreg %m0 ) #0 {
123- ; GCN-LABEL: v_interp_f16:
124- ; GCN: ; %bb.0: ; %main_body
125- ; GCN-NEXT: s_mov_b32 s3, exec_lo
126- ; GCN-NEXT: s_wqm_b32 exec_lo, exec_lo
127- ; GCN-NEXT: s_mov_b32 m0, s2
128- ; GCN-NEXT: lds_param_load v1, attr0.x wait_vdst:15
129- ; GCN-NEXT: s_mov_b32 exec_lo, s3
130- ; GCN-NEXT: v_mov_b32_e32 v0, s0
131- ; GCN-NEXT: v_mov_b32_e32 v2, s1
132- ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
133- ; GCN-NEXT: v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0
134- ; GCN-NEXT: v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
135- ; GCN-NEXT: v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
136- ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
137- ; GCN-NEXT: v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
138- ; GCN-NEXT: v_add_f16_e32 v0, v3, v0
139- ; GCN-NEXT: ; return to shader part epilog
124+ ; GFX11-TRUE16-LABEL: v_interp_f16:
125+ ; GFX11-TRUE16: ; %bb.0: ; %main_body
126+ ; GFX11-TRUE16-NEXT: s_mov_b32 s3, exec_lo
127+ ; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
128+ ; GFX11-TRUE16-NEXT: s_mov_b32 m0, s2
129+ ; GFX11-TRUE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15
130+ ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s3
131+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0
132+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s1
133+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
134+ ; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v3, v1.l, v0, v1.l wait_exp:0
135+ ; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v4, v1.h, v0, v1.h wait_exp:7
136+ ; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
137+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
138+ ; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7
139+ ; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
140+ ; GFX11-TRUE16-NEXT: ; return to shader part epilog
141+ ;
142+ ; GFX11-FAKE16-LABEL: v_interp_f16:
143+ ; GFX11-FAKE16: ; %bb.0: ; %main_body
144+ ; GFX11-FAKE16-NEXT: s_mov_b32 s3, exec_lo
145+ ; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
146+ ; GFX11-FAKE16-NEXT: s_mov_b32 m0, s2
147+ ; GFX11-FAKE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15
148+ ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s3
149+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
150+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s1
151+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
152+ ; GFX11-FAKE16-NEXT: v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0
153+ ; GFX11-FAKE16-NEXT: v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
154+ ; GFX11-FAKE16-NEXT: v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7
155+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
156+ ; GFX11-FAKE16-NEXT: v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
157+ ; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v3, v0
158+ ; GFX11-FAKE16-NEXT: ; return to shader part epilog
140159main_body:
141160 %p0 = call float @llvm.amdgcn.lds.param.load (i32 0 , i32 0 , i32 %m0 )
142161 %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16 (float %p0 , float %i , float %p0 , i1 0 )
@@ -148,23 +167,41 @@ main_body:
148167}
149168
150169define amdgpu_ps half @v_interp_rtz_f16 (float inreg %i , float inreg %j , i32 inreg %m0 ) #0 {
151- ; GCN-LABEL: v_interp_rtz_f16:
152- ; GCN: ; %bb.0: ; %main_body
153- ; GCN-NEXT: s_mov_b32 s3, exec_lo
154- ; GCN-NEXT: s_wqm_b32 exec_lo, exec_lo
155- ; GCN-NEXT: s_mov_b32 m0, s2
156- ; GCN-NEXT: lds_param_load v1, attr0.x wait_vdst:15
157- ; GCN-NEXT: s_mov_b32 exec_lo, s3
158- ; GCN-NEXT: v_mov_b32_e32 v0, s0
159- ; GCN-NEXT: v_mov_b32_e32 v2, s1
160- ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
161- ; GCN-NEXT: v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0
162- ; GCN-NEXT: v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
163- ; GCN-NEXT: v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7
164- ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
165- ; GCN-NEXT: v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
166- ; GCN-NEXT: v_add_f16_e32 v0, v3, v0
167- ; GCN-NEXT: ; return to shader part epilog
170+ ; GFX11-TRUE16-LABEL: v_interp_rtz_f16:
171+ ; GFX11-TRUE16: ; %bb.0: ; %main_body
172+ ; GFX11-TRUE16-NEXT: s_mov_b32 s3, exec_lo
173+ ; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo
174+ ; GFX11-TRUE16-NEXT: s_mov_b32 m0, s2
175+ ; GFX11-TRUE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15
176+ ; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s3
177+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0
178+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s1
179+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
180+ ; GFX11-TRUE16-NEXT: v_interp_p10_rtz_f16_f32 v3, v1.l, v0, v1.l wait_exp:0
181+ ; GFX11-TRUE16-NEXT: v_interp_p10_rtz_f16_f32 v4, v1.h, v0, v1.h wait_exp:7
182+ ; GFX11-TRUE16-NEXT: v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7
183+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
184+ ; GFX11-TRUE16-NEXT: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7
185+ ; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
186+ ; GFX11-TRUE16-NEXT: ; return to shader part epilog
187+ ;
188+ ; GFX11-FAKE16-LABEL: v_interp_rtz_f16:
189+ ; GFX11-FAKE16: ; %bb.0: ; %main_body
190+ ; GFX11-FAKE16-NEXT: s_mov_b32 s3, exec_lo
191+ ; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo
192+ ; GFX11-FAKE16-NEXT: s_mov_b32 m0, s2
193+ ; GFX11-FAKE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15
194+ ; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s3
195+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0
196+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s1
197+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
198+ ; GFX11-FAKE16-NEXT: v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0
199+ ; GFX11-FAKE16-NEXT: v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7
200+ ; GFX11-FAKE16-NEXT: v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7
201+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
202+ ; GFX11-FAKE16-NEXT: v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7
203+ ; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v3, v0
204+ ; GFX11-FAKE16-NEXT: ; return to shader part epilog
168205main_body:
169206 %p0 = call float @llvm.amdgcn.lds.param.load (i32 0 , i32 0 , i32 %m0 )
170207 %l_p0 = call float @llvm.amdgcn.interp.p10.rtz.f16 (float %p0 , float %i , float %p0 , i1 0 )
@@ -176,17 +213,30 @@ main_body:
176213}
177214
178215define amdgpu_ps half @v_interp_f16_imm_params (float inreg %i , float inreg %j ) #0 {
179- ; GCN-LABEL: v_interp_f16_imm_params:
180- ; GCN: ; %bb.0: ; %main_body
181- ; GCN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
182- ; GCN-NEXT: v_mov_b32_e32 v2, s1
183- ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
184- ; GCN-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
185- ; GCN-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
186- ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
187- ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v1
188- ; GCN-NEXT: v_add_f16_e32 v0, v1, v0
189- ; GCN-NEXT: ; return to shader part epilog
216+ ; GFX11-TRUE16-LABEL: v_interp_f16_imm_params:
217+ ; GFX11-TRUE16: ; %bb.0: ; %main_body
218+ ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0
219+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0
220+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s1
221+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
222+ ; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7
223+ ; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7
224+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
225+ ; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
226+ ; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l
227+ ; GFX11-TRUE16-NEXT: ; return to shader part epilog
228+ ;
229+ ; GFX11-FAKE16-LABEL: v_interp_f16_imm_params:
230+ ; GFX11-FAKE16: ; %bb.0: ; %main_body
231+ ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
232+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s1
233+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
234+ ; GFX11-FAKE16-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7
235+ ; GFX11-FAKE16-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7
236+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
237+ ; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1
238+ ; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v1, v0
239+ ; GFX11-FAKE16-NEXT: ; return to shader part epilog
190240main_body:
191241 %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16 (float 0 .0 , float %i , float 0 .0 , i1 0 )
192242 %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16 (float 0 .0 , float %j , float 0 .0 , i1 0 )
0 commit comments