Skip to content

Commit 4d4f883

Browse files
add more test
1 parent b1575e5 commit 4d4f883

File tree

1 file changed

+24
-57
lines changed

1 file changed

+24
-57
lines changed

llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll

Lines changed: 24 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -73,51 +73,6 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
7373
ret ptr %ptr
7474
}
7575

76-
define amdgpu_ps half @uniform_v_to_s_f16(half inreg %a, half inreg %b) {
77-
; GFX11-LABEL: uniform_v_to_s_f16:
78-
; GFX11: ; %bb.0:
79-
; GFX11-NEXT: v_max_f16_e64 v0, s0, s1
80-
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
81-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
82-
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
83-
; GFX11-NEXT: ; return to shader part epilog
84-
%max = call half @llvm.maximum.f16(half %a, half %b)
85-
ret half %max
86-
}
87-
88-
define amdgpu_ps float @uniform_v_to_s_v2f16(<2 x half> inreg %a, <2 x half> inreg %b) {
89-
; GFX11-LABEL: uniform_v_to_s_v2f16:
90-
; GFX11: ; %bb.0:
91-
; GFX11-NEXT: v_pk_max_f16 v0, s0, s1
92-
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
93-
; GFX11-NEXT: s_lshr_b32 s2, s1, 16
94-
; GFX11-NEXT: s_lshr_b32 s0, s0, 16
95-
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
96-
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
97-
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2
98-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
99-
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
100-
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
101-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
102-
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
103-
; GFX11-NEXT: ; return to shader part epilog
104-
%max = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
105-
%cast = bitcast <2 x half> %max to float
106-
ret float %cast
107-
}
108-
109-
define amdgpu_ps float @uniform_v_s_float(i32 inreg %a, i32 inreg %b) {
110-
; GFX11-LABEL: uniform_v_s_float:
111-
; GFX11: ; %bb.0:
112-
; GFX11-NEXT: s_and_b32 s0, s0, s1
113-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
114-
; GFX11-NEXT: v_mov_b32_e32 v0, s0
115-
; GFX11-NEXT: ; return to shader part epilog
116-
%and = and i32 %a, %b
117-
%cast = bitcast i32 %and to float
118-
ret float %cast
119-
}
120-
12176
define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b) {
12277
; GFX11-LABEL: uniform_v_to_s_double:
12378
; GFX11: ; %bb.0:
@@ -134,18 +89,6 @@ define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b)
13489
ret double %max0
13590
}
13691

137-
define amdgpu_ps float @uniform_v_to_s_f32(float inreg %a, float inreg %b) {
138-
; GFX11-LABEL: uniform_v_to_s_f32:
139-
; GFX11: ; %bb.0:
140-
; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
141-
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
142-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
143-
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
144-
; GFX11-NEXT: ; return to shader part epilog
145-
%max0 = call float @llvm.maximum.f32(float %a, float %b)
146-
ret float %max0
147-
}
148-
14992
define amdgpu_ps <2 x i16> @uniform_v_to_s_2_i16(float inreg %a, float inreg %b) {
15093
; GFX11-LABEL: uniform_v_to_s_2_i16:
15194
; GFX11: ; %bb.0:
@@ -173,3 +116,27 @@ define amdgpu_ps i16 @uniform_v_to_s_i16(half inreg %a, half inreg %b) {
173116
%cast = bitcast half %max to i16
174117
ret i16 %cast
175118
}
119+
120+
define amdgpu_ps half @uniform_add_i16_cast_to_f16(i16 inreg %a, i16 inreg %b) {
121+
; GFX11-LABEL: uniform_add_i16_cast_to_f16:
122+
; GFX11: ; %bb.0:
123+
; GFX11-NEXT: s_add_i32 s0, s0, s1
124+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
125+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
126+
; GFX11-NEXT: ; return to shader part epilog
127+
%add = add i16 %a, %b
128+
%cast = bitcast i16 %add to half
129+
ret half %cast
130+
}
131+
132+
define amdgpu_ps float @uniform_mul_i32_cast_to_float(i32 inreg %a, i32 inreg %b) {
133+
; GFX11-LABEL: uniform_mul_i32_cast_to_float:
134+
; GFX11: ; %bb.0:
135+
; GFX11-NEXT: s_mul_i32 s0, s0, s1
136+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
137+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
138+
; GFX11-NEXT: ; return to shader part epilog
139+
%mul = mul i32 %a, %b
140+
%cast = bitcast i32 %mul to float
141+
ret float %cast
142+
}

0 commit comments

Comments
 (0)