@@ -9,6 +9,172 @@ declare half @llvm.amdgcn.cvt.f16.fp8(i32, i32)
99declare <2 x half > @llvm.amdgcn.cvt.pk.f16.bf8 (i16 )
1010declare <2 x half > @llvm.amdgcn.cvt.pk.f16.fp8 (i16 )
1111
12+ define amdgpu_ps float @test_cvt_f16_bf8_byte0 (i32 %a ) {
13+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte0:
14+ ; GFX1250-SDAG-REAL16: ; %bb.0:
15+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
16+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
17+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
18+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
19+ ;
20+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
21+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
22+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
23+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
24+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
25+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
26+ ;
27+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte0:
28+ ; GFX1250-GISEL-REAL16: ; %bb.0:
29+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e32 v0.l, v0
30+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
31+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
32+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
33+ ;
34+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte0:
35+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
36+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e32 v0, v0
37+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
38+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
39+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
40+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 0 )
41+ %ret = fpext half %cvt to float
42+ ret float %ret
43+ }
44+
45+ define amdgpu_ps float @test_cvt_f16_bf8_byte1 (i32 %a ) {
46+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte1:
47+ ; GFX1250-SDAG-REAL16: ; %bb.0:
48+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
49+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
50+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
51+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
52+ ;
53+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
54+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
55+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
56+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
57+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
58+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
59+ ;
60+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte1:
61+ ; GFX1250-GISEL-REAL16: ; %bb.0:
62+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:1
63+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
64+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
65+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
66+ ;
67+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte1:
68+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
69+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:1
70+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
71+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
72+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
73+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 1 )
74+ %ret = fpext half %cvt to float
75+ ret float %ret
76+ }
77+
78+ define amdgpu_ps float @test_cvt_f16_bf8_byte2 (i32 %a ) {
79+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte2:
80+ ; GFX1250-SDAG-REAL16: ; %bb.0:
81+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
82+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
83+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
84+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
85+ ;
86+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
87+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
88+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
89+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
90+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
91+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
92+ ;
93+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte2:
94+ ; GFX1250-GISEL-REAL16: ; %bb.0:
95+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:2
96+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
97+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
98+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
99+ ;
100+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte2:
101+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
102+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:2
103+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
104+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
105+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
106+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 2 )
107+ %ret = fpext half %cvt to float
108+ ret float %ret
109+ }
110+
111+ define amdgpu_ps float @test_cvt_f16_bf8_byte3 (i32 %a ) {
112+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3:
113+ ; GFX1250-SDAG-REAL16: ; %bb.0:
114+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
115+ ; GFX1250-SDAG-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
116+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
117+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
118+ ;
119+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
120+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
121+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
122+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
123+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
124+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
125+ ;
126+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3:
127+ ; GFX1250-GISEL-REAL16: ; %bb.0:
128+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
129+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
130+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f32_f16_e32 v0, v0.l
131+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
132+ ;
133+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3:
134+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
135+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
136+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
137+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0
138+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
139+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 3 )
140+ %ret = fpext half %cvt to float
141+ ret float %ret
142+ }
143+
144+ define amdgpu_ps float @test_cvt_f16_bf8_byte3_hi (i32 %a ) {
145+ ; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
146+ ; GFX1250-SDAG-REAL16: ; %bb.0:
147+ ; GFX1250-SDAG-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.h, v0 byte_sel:3
148+ ; GFX1250-SDAG-REAL16-NEXT: v_mov_b16_e32 v0.l, 0
149+ ; GFX1250-SDAG-REAL16-NEXT: ; return to shader part epilog
150+ ;
151+ ; GFX1250-SDAG-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
152+ ; GFX1250-SDAG-FAKE16: ; %bb.0:
153+ ; GFX1250-SDAG-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
154+ ; GFX1250-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
155+ ; GFX1250-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, 0, 0x5040100
156+ ; GFX1250-SDAG-FAKE16-NEXT: ; return to shader part epilog
157+ ;
158+ ; GFX1250-GISEL-REAL16-LABEL: test_cvt_f16_bf8_byte3_hi:
159+ ; GFX1250-GISEL-REAL16: ; %bb.0:
160+ ; GFX1250-GISEL-REAL16-NEXT: v_cvt_f16_bf8_e64 v0.l, v0 byte_sel:3
161+ ; GFX1250-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1)
162+ ; GFX1250-GISEL-REAL16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
163+ ; GFX1250-GISEL-REAL16-NEXT: ; return to shader part epilog
164+ ;
165+ ; GFX1250-GISEL-FAKE16-LABEL: test_cvt_f16_bf8_byte3_hi:
166+ ; GFX1250-GISEL-FAKE16: ; %bb.0:
167+ ; GFX1250-GISEL-FAKE16-NEXT: v_cvt_f16_bf8_e64 v0, v0 byte_sel:3
168+ ; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
169+ ; GFX1250-GISEL-FAKE16-NEXT: v_lshl_or_b32 v0, v0, 16, 0
170+ ; GFX1250-GISEL-FAKE16-NEXT: ; return to shader part epilog
171+ %cvt = tail call half @llvm.amdgcn.cvt.f16.bf8 (i32 %a , i32 3 )
172+ %ins.0 = insertelement <2 x half > undef , half 0 .0 , i32 0
173+ %ins.1 = insertelement <2 x half > %ins.0 , half %cvt , i32 1
174+ %ret = bitcast <2 x half > %ins.1 to float
175+ ret float %ret
176+ }
177+
12178define amdgpu_ps float @test_cvt_f16_fp8_byte0 (i32 %a ) {
13179; GFX1250-SDAG-REAL16-LABEL: test_cvt_f16_fp8_byte0:
14180; GFX1250-SDAG-REAL16: ; %bb.0:
0 commit comments