@@ -72,3 +72,76 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
7272 %ptr = inttoptr i32 %int to ptr
7373 ret ptr %ptr
7474}
75+
76+ define amdgpu_ps half @uniform_v_to_s_f16 (half inreg %a , half inreg %b ) {
77+ ; GFX11-LABEL: uniform_v_to_s_f16:
78+ ; GFX11: ; %bb.0:
79+ ; GFX11-NEXT: v_max_f16_e64 v0, s0, s1
80+ ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
81+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
82+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
83+ ; GFX11-NEXT: ; return to shader part epilog
84+ %max = call half @llvm.maximum.f16 (half %a , half %b )
85+ ret half %max
86+ }
87+
88+ define amdgpu_ps float @uniform_v_to_s_v2f16 (<2 x half > inreg %a , <2 x half > inreg %b ) {
89+ ; GFX11-LABEL: uniform_v_to_s_v2f16:
90+ ; GFX11: ; %bb.0:
91+ ; GFX11-NEXT: v_pk_max_f16 v0, s0, s1
92+ ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
93+ ; GFX11-NEXT: s_lshr_b32 s2, s1, 16
94+ ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
95+ ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
96+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
97+ ; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2
98+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
99+ ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
100+ ; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
101+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
102+ ; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
103+ ; GFX11-NEXT: ; return to shader part epilog
104+ %max = call <2 x half > @llvm.maximum.f16 (<2 x half > %a , <2 x half > %b )
105+ %cast = bitcast <2 x half > %max to float
106+ ret float %cast
107+ }
108+
109+ define amdgpu_ps float @uniform_v_s_float (i32 inreg %a , i32 inreg %b ) {
110+ ; GFX11-LABEL: uniform_v_s_float:
111+ ; GFX11: ; %bb.0:
112+ ; GFX11-NEXT: s_and_b32 s0, s0, s1
113+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
114+ ; GFX11-NEXT: v_mov_b32_e32 v0, s0
115+ ; GFX11-NEXT: ; return to shader part epilog
116+ %and = and i32 %a , %b
117+ %cast = bitcast i32 %and to float
118+ ret float %cast
119+ }
120+
121+ define amdgpu_ps double @uniform_v_to_s_double (double inreg %a , double inreg %b ) {
122+ ; GFX11-LABEL: uniform_v_to_s_double:
123+ ; GFX11: ; %bb.0:
124+ ; GFX11-NEXT: v_max_f64 v[0:1], s[0:1], s[2:3]
125+ ; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
126+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
127+ ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
128+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, 0, s0
129+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
130+ ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
131+ ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
132+ ; GFX11-NEXT: ; return to shader part epilog
133+ %max0 = call double @llvm.maximum.f64 (double %a , double %b )
134+ ret double %max0
135+ }
136+
137+ define amdgpu_ps float @uniform_v_to_s_f32 (float inreg %a , float inreg %b ) {
138+ ; GFX11-LABEL: uniform_v_to_s_f32:
139+ ; GFX11: ; %bb.0:
140+ ; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
141+ ; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
142+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
143+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
144+ ; GFX11-NEXT: ; return to shader part epilog
145+ %max0 = call float @llvm.maximum.f32 (float %a , float %b )
146+ ret float %max0
147+ }
0 commit comments