11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
33
4+ define void @scalar (float %num , ptr addrspace (1 ) %p ) {
5+ ; CHECK-LABEL: scalar:
6+ ; CHECK: ; %bb.0: ; %entry
7+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+ ; CHECK-NEXT: v_mov_b32_e32 v3, v2
9+ ; CHECK-NEXT: v_mov_b32_e32 v2, v1
10+ ; CHECK-NEXT: v_bfe_u32 v1, v0, 16, 1
11+ ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
12+ ; CHECK-NEXT: v_add3_u32 v1, v1, v0, s4
13+ ; CHECK-NEXT: v_or_b32_e32 v4, 0x400000, v0
14+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
15+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
16+ ; CHECK-NEXT: global_store_short_d16_hi v[2:3], v0, off
17+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
18+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
19+ entry:
20+ %conv = fptrunc float %num to bfloat
21+ store bfloat %conv , ptr addrspace (1 ) %p , align 8
22+ ret void
23+ }
24+
425define void @v2 (<2 x float > %num , ptr addrspace (1 ) %p ) {
526; CHECK-LABEL: v2:
627; CHECK: ; %bb.0: ; %entry
@@ -27,6 +48,40 @@ entry:
2748 ret void
2849}
2950
51+ define void @v3 (<3 x float > %num , ptr addrspace (1 ) %p ) {
52+ ; CHECK-LABEL: v3:
53+ ; CHECK: ; %bb.0: ; %entry
54+ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55+ ; CHECK-NEXT: v_mov_b32_e32 v5, v4
56+ ; CHECK-NEXT: v_mov_b32_e32 v4, v3
57+ ; CHECK-NEXT: v_bfe_u32 v3, v0, 16, 1
58+ ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
59+ ; CHECK-NEXT: v_add3_u32 v3, v3, v0, s4
60+ ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0
61+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
62+ ; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
63+ ; CHECK-NEXT: v_bfe_u32 v3, v1, 16, 1
64+ ; CHECK-NEXT: v_add3_u32 v3, v3, v1, s4
65+ ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1
66+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
67+ ; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
68+ ; CHECK-NEXT: s_mov_b32 s5, 0x7060302
69+ ; CHECK-NEXT: v_perm_b32 v0, v1, v0, s5
70+ ; CHECK-NEXT: v_bfe_u32 v1, v2, 16, 1
71+ ; CHECK-NEXT: v_add3_u32 v1, v1, v2, s4
72+ ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v2
73+ ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
74+ ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
75+ ; CHECK-NEXT: global_store_short_d16_hi v[4:5], v1, off offset:4
76+ ; CHECK-NEXT: global_store_dword v[4:5], v0, off
77+ ; CHECK-NEXT: s_waitcnt vmcnt(0)
78+ ; CHECK-NEXT: s_setpc_b64 s[30:31]
79+ entry:
80+ %conv = fptrunc <3 x float > %num to <3 x bfloat>
81+ store <3 x bfloat> %conv , ptr addrspace (1 ) %p , align 8
82+ ret void
83+ }
84+
3085define void @v4 (<4 x float > %num , ptr addrspace (1 ) %p ) {
3186; CHECK-LABEL: v4:
3287; CHECK: ; %bb.0: ; %entry
0 commit comments