@@ -73,51 +73,6 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
7373 ret ptr %ptr
7474}
7575
76- define amdgpu_ps half @uniform_v_to_s_f16 (half inreg %a , half inreg %b ) {
77- ; GFX11-LABEL: uniform_v_to_s_f16:
78- ; GFX11: ; %bb.0:
79- ; GFX11-NEXT: v_max_f16_e64 v0, s0, s1
80- ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
81- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
82- ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
83- ; GFX11-NEXT: ; return to shader part epilog
84- %max = call half @llvm.maximum.f16 (half %a , half %b )
85- ret half %max
86- }
87-
88- define amdgpu_ps float @uniform_v_to_s_v2f16 (<2 x half > inreg %a , <2 x half > inreg %b ) {
89- ; GFX11-LABEL: uniform_v_to_s_v2f16:
90- ; GFX11: ; %bb.0:
91- ; GFX11-NEXT: v_pk_max_f16 v0, s0, s1
92- ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
93- ; GFX11-NEXT: s_lshr_b32 s2, s1, 16
94- ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
95- ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
96- ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
97- ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2
98- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
99- ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
100- ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
101- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
102- ; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
103- ; GFX11-NEXT: ; return to shader part epilog
104- %max = call <2 x half > @llvm.maximum.f16 (<2 x half > %a , <2 x half > %b )
105- %cast = bitcast <2 x half > %max to float
106- ret float %cast
107- }
108-
109- define amdgpu_ps float @uniform_v_s_float (i32 inreg %a , i32 inreg %b ) {
110- ; GFX11-LABEL: uniform_v_s_float:
111- ; GFX11: ; %bb.0:
112- ; GFX11-NEXT: s_and_b32 s0, s0, s1
113- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
114- ; GFX11-NEXT: v_mov_b32_e32 v0, s0
115- ; GFX11-NEXT: ; return to shader part epilog
116- %and = and i32 %a , %b
117- %cast = bitcast i32 %and to float
118- ret float %cast
119- }
120-
12176define amdgpu_ps double @uniform_v_to_s_double (double inreg %a , double inreg %b ) {
12277; GFX11-LABEL: uniform_v_to_s_double:
12378; GFX11: ; %bb.0:
@@ -134,18 +89,6 @@ define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b)
13489 ret double %max0
13590}
13691
137- define amdgpu_ps float @uniform_v_to_s_f32 (float inreg %a , float inreg %b ) {
138- ; GFX11-LABEL: uniform_v_to_s_f32:
139- ; GFX11: ; %bb.0:
140- ; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
141- ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
142- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
143- ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
144- ; GFX11-NEXT: ; return to shader part epilog
145- %max0 = call float @llvm.maximum.f32 (float %a , float %b )
146- ret float %max0
147- }
148-
14992define amdgpu_ps <2 x i16 > @uniform_v_to_s_2_i16 (float inreg %a , float inreg %b ) {
15093; GFX11-LABEL: uniform_v_to_s_2_i16:
15194; GFX11: ; %bb.0:
@@ -173,3 +116,27 @@ define amdgpu_ps i16 @uniform_v_to_s_i16(half inreg %a, half inreg %b) {
173116 %cast = bitcast half %max to i16
174117 ret i16 %cast
175118}
119+
120+ define amdgpu_ps half @uniform_add_i16_cast_to_f16 (i16 inreg %a , i16 inreg %b ) {
121+ ; GFX11-LABEL: uniform_add_i16_cast_to_f16:
122+ ; GFX11: ; %bb.0:
123+ ; GFX11-NEXT: s_add_i32 s0, s0, s1
124+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
125+ ; GFX11-NEXT: v_mov_b32_e32 v0, s0
126+ ; GFX11-NEXT: ; return to shader part epilog
127+ %add = add i16 %a , %b
128+ %cast = bitcast i16 %add to half
129+ ret half %cast
130+ }
131+
132+ define amdgpu_ps float @uniform_mul_i32_cast_to_float (i32 inreg %a , i32 inreg %b ) {
133+ ; GFX11-LABEL: uniform_mul_i32_cast_to_float:
134+ ; GFX11: ; %bb.0:
135+ ; GFX11-NEXT: s_mul_i32 s0, s0, s1
136+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
137+ ; GFX11-NEXT: v_mov_b32_e32 v0, s0
138+ ; GFX11-NEXT: ; return to shader part epilog
139+ %mul = mul i32 %a , %b
140+ %cast = bitcast i32 %mul to float
141+ ret float %cast
142+ }
0 commit comments