Skip to content

Commit c720140

Browse files
add more test
1 parent 0943e98 commit c720140

File tree

1 file changed

+73
-0
lines changed

1 file changed

+73
-0
lines changed

llvm/test/CodeGen/AMDGPU/uniform-vgpr-to-sgpr-return.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,76 @@ define amdgpu_ps ptr @uniform_v_to_s_ptr(ptr inreg %x) {
7272
%ptr = inttoptr i32 %int to ptr
7373
ret ptr %ptr
7474
}
75+
76+
define amdgpu_ps half @uniform_v_to_s_f16(half inreg %a, half inreg %b) {
77+
; GFX11-LABEL: uniform_v_to_s_f16:
78+
; GFX11: ; %bb.0:
79+
; GFX11-NEXT: v_max_f16_e64 v0, s0, s1
80+
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
81+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
82+
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
83+
; GFX11-NEXT: ; return to shader part epilog
84+
%max = call half @llvm.maximum.f16(half %a, half %b)
85+
ret half %max
86+
}
87+
88+
define amdgpu_ps float @uniform_v_to_s_v2f16(<2 x half> inreg %a, <2 x half> inreg %b) {
89+
; GFX11-LABEL: uniform_v_to_s_v2f16:
90+
; GFX11: ; %bb.0:
91+
; GFX11-NEXT: v_pk_max_f16 v0, s0, s1
92+
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s1
93+
; GFX11-NEXT: s_lshr_b32 s2, s1, 16
94+
; GFX11-NEXT: s_lshr_b32 s0, s0, 16
95+
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
96+
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
97+
; GFX11-NEXT: v_cmp_o_f16_e64 vcc_lo, s0, s2
98+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
99+
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
100+
; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
101+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
102+
; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
103+
; GFX11-NEXT: ; return to shader part epilog
104+
%max = call <2 x half> @llvm.maximum.f16(<2 x half> %a, <2 x half> %b)
105+
%cast = bitcast <2 x half> %max to float
106+
ret float %cast
107+
}
108+
109+
define amdgpu_ps float @uniform_v_s_float(i32 inreg %a, i32 inreg %b) {
110+
; GFX11-LABEL: uniform_v_s_float:
111+
; GFX11: ; %bb.0:
112+
; GFX11-NEXT: s_and_b32 s0, s0, s1
113+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
114+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
115+
; GFX11-NEXT: ; return to shader part epilog
116+
%and = and i32 %a, %b
117+
%cast = bitcast i32 %and to float
118+
ret float %cast
119+
}
120+
121+
define amdgpu_ps double @uniform_v_to_s_double(double inreg %a, double inreg %b) {
122+
; GFX11-LABEL: uniform_v_to_s_double:
123+
; GFX11: ; %bb.0:
124+
; GFX11-NEXT: v_max_f64 v[0:1], s[0:1], s[2:3]
125+
; GFX11-NEXT: v_cmp_u_f64_e64 s0, s[0:1], s[2:3]
126+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
127+
; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, 0x7ff80000, s0
128+
; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, 0, s0
129+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
130+
; GFX11-NEXT: v_readfirstlane_b32 s1, v1
131+
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
132+
; GFX11-NEXT: ; return to shader part epilog
133+
%max0 = call double @llvm.maximum.f64(double %a, double %b)
134+
ret double %max0
135+
}
136+
137+
define amdgpu_ps float @uniform_v_to_s_f32(float inreg %a, float inreg %b) {
138+
; GFX11-LABEL: uniform_v_to_s_f32:
139+
; GFX11: ; %bb.0:
140+
; GFX11-NEXT: v_max_f32_e64 v0, s0, s1
141+
; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1
142+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
143+
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo
144+
; GFX11-NEXT: ; return to shader part epilog
145+
%max0 = call float @llvm.maximum.f32(float %a, float %b)
146+
ret float %max0
147+
}

0 commit comments

Comments
 (0)