|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | 2 |
|
3 | | -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s |
4 | | -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s |
5 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s |
6 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s |
| 3 | +; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI %s |
| 4 | +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=VI %s |
| 5 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s |
| 6 | +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s |
7 | 7 |
|
8 | 8 | define half @bitcast_i16_to_f16(i16 %a, i32 %b) { |
9 | | -; GCN-LABEL: bitcast_i16_to_f16: |
10 | | -; GCN: ; %bb.0: |
11 | | -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
12 | | -; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
13 | | -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
14 | | -; GCN-NEXT: ; implicit-def: $vgpr0 |
15 | | -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
16 | | -; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
17 | | -; GCN-NEXT: s_cbranch_execnz .LBB0_3 |
18 | | -; GCN-NEXT: ; %bb.1: ; %Flow |
19 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
20 | | -; GCN-NEXT: s_cbranch_execnz .LBB0_4 |
21 | | -; GCN-NEXT: .LBB0_2: ; %end |
22 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
23 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
24 | | -; GCN-NEXT: .LBB0_3: ; %cmp.false |
25 | | -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v2 |
26 | | -; GCN-NEXT: ; implicit-def: $vgpr2 |
27 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
28 | | -; GCN-NEXT: s_cbranch_execz .LBB0_2 |
29 | | -; GCN-NEXT: .LBB0_4: ; %cmp.true |
30 | | -; GCN-NEXT: v_add_i32_e32 v0, vcc, 3, v2 |
31 | | -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 |
32 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
33 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
| 9 | +; SI-LABEL: bitcast_i16_to_f16: |
| 10 | +; SI: ; %bb.0: |
| 11 | +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 12 | +; SI-NEXT: v_and_b32_e32 v2, 0xffff, v0 |
| 13 | +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
| 14 | +; SI-NEXT: ; implicit-def: $vgpr0 |
| 15 | +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| 16 | +; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
| 17 | +; SI-NEXT: s_cbranch_execnz .LBB0_3 |
| 18 | +; SI-NEXT: ; %bb.1: ; %Flow |
| 19 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 20 | +; SI-NEXT: s_cbranch_execnz .LBB0_4 |
| 21 | +; SI-NEXT: .LBB0_2: ; %end |
| 22 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 23 | +; SI-NEXT: s_setpc_b64 s[30:31] |
| 24 | +; SI-NEXT: .LBB0_3: ; %cmp.false |
| 25 | +; SI-NEXT: v_cvt_f32_f16_e32 v0, v2 |
| 26 | +; SI-NEXT: ; implicit-def: $vgpr2 |
| 27 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 28 | +; SI-NEXT: s_cbranch_execz .LBB0_2 |
| 29 | +; SI-NEXT: .LBB0_4: ; %cmp.true |
| 30 | +; SI-NEXT: v_add_i32_e32 v0, vcc, 3, v2 |
| 31 | +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| 32 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 33 | +; SI-NEXT: s_setpc_b64 s[30:31] |
34 | 34 | ; |
35 | 35 | ; VI-LABEL: bitcast_i16_to_f16: |
36 | 36 | ; VI: ; %bb.0: |
|
89 | 89 | } |
90 | 90 |
|
91 | 91 | define i16 @bitcast_f16_to_i16(half %a, i32 %b) { |
92 | | -; GCN-LABEL: bitcast_f16_to_i16: |
93 | | -; GCN: ; %bb.0: |
94 | | -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
95 | | -; GCN-NEXT: v_mov_b32_e32 v2, v0 |
96 | | -; GCN-NEXT: v_mov_b32_e32 v0, 0 |
97 | | -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
98 | | -; GCN-NEXT: v_cvt_f16_f32_e32 v1, v2 |
99 | | -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
100 | | -; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
101 | | -; GCN-NEXT: s_cbranch_execnz .LBB1_3 |
102 | | -; GCN-NEXT: ; %bb.1: ; %Flow |
103 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
104 | | -; GCN-NEXT: s_cbranch_execnz .LBB1_4 |
105 | | -; GCN-NEXT: .LBB1_2: ; %end |
106 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
107 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
108 | | -; GCN-NEXT: .LBB1_3: ; %cmp.false |
109 | | -; GCN-NEXT: v_mov_b32_e32 v0, v1 |
110 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
111 | | -; GCN-NEXT: s_cbranch_execz .LBB1_2 |
112 | | -; GCN-NEXT: .LBB1_4: ; %cmp.true |
113 | | -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v1 |
114 | | -; GCN-NEXT: v_add_f32_e32 v0, 0x38000000, v0 |
115 | | -; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 |
116 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
117 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
| 92 | +; SI-LABEL: bitcast_f16_to_i16: |
| 93 | +; SI: ; %bb.0: |
| 94 | +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 95 | +; SI-NEXT: v_cvt_f16_f32_e32 v2, v0 |
| 96 | +; SI-NEXT: v_mov_b32_e32 v0, 0 |
| 97 | +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
| 98 | +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| 99 | +; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
| 100 | +; SI-NEXT: s_cbranch_execnz .LBB1_3 |
| 101 | +; SI-NEXT: ; %bb.1: ; %Flow |
| 102 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 103 | +; SI-NEXT: s_cbranch_execnz .LBB1_4 |
| 104 | +; SI-NEXT: .LBB1_2: ; %end |
| 105 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 106 | +; SI-NEXT: s_setpc_b64 s[30:31] |
| 107 | +; SI-NEXT: .LBB1_3: ; %cmp.false |
| 108 | +; SI-NEXT: v_mov_b32_e32 v0, v2 |
| 109 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 110 | +; SI-NEXT: s_cbranch_execz .LBB1_2 |
| 111 | +; SI-NEXT: .LBB1_4: ; %cmp.true |
| 112 | +; SI-NEXT: v_cvt_f32_f16_e32 v0, v2 |
| 113 | +; SI-NEXT: v_add_f32_e32 v0, 0x38000000, v0 |
| 114 | +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| 115 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 116 | +; SI-NEXT: s_setpc_b64 s[30:31] |
118 | 117 | ; |
119 | 118 | ; VI-LABEL: bitcast_f16_to_i16: |
120 | 119 | ; VI: ; %bb.0: |
@@ -173,21 +172,20 @@ end: |
173 | 172 | } |
174 | 173 |
|
175 | 174 | define bfloat @bitcast_i16_to_bf16(i16 %a, i32 %b) { |
176 | | -; GCN-LABEL: bitcast_i16_to_bf16: |
177 | | -; GCN: ; %bb.0: |
178 | | -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
179 | | -; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
180 | | -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
181 | | -; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
182 | | -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
183 | | -; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
184 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
185 | | -; GCN-NEXT: s_cbranch_execz .LBB2_2 |
186 | | -; GCN-NEXT: ; %bb.1: ; %cmp.true |
187 | | -; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x30000, v0 |
188 | | -; GCN-NEXT: .LBB2_2: ; %end |
189 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
190 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
| 175 | +; SI-LABEL: bitcast_i16_to_bf16: |
| 176 | +; SI: ; %bb.0: |
| 177 | +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 178 | +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 179 | +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
| 180 | +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| 181 | +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| 182 | +; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
| 183 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 184 | +; SI-NEXT: ; %bb.1: ; %cmp.true |
| 185 | +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x30000, v0 |
| 186 | +; SI-NEXT: ; %bb.2: ; %end |
| 187 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 188 | +; SI-NEXT: s_setpc_b64 s[30:31] |
191 | 189 | ; |
192 | 190 | ; VI-LABEL: bitcast_i16_to_bf16: |
193 | 191 | ; VI: ; %bb.0: |
@@ -246,33 +244,33 @@ end: |
246 | 244 | } |
247 | 245 |
|
248 | 246 | define i16 @bitcast_bf16_to_i16(bfloat %a, i32 %b) { |
249 | | -; GCN-LABEL: bitcast_bf16_to_i16: |
250 | | -; GCN: ; %bb.0: |
251 | | -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
252 | | -; GCN-NEXT: v_mov_b32_e32 v2, v0 |
253 | | -; GCN-NEXT: v_mov_b32_e32 v0, 0 |
254 | | -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
255 | | -; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v2 |
256 | | -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
257 | | -; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
258 | | -; GCN-NEXT: s_cbranch_execnz .LBB3_3 |
259 | | -; GCN-NEXT: ; %bb.1: ; %Flow |
260 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
261 | | -; GCN-NEXT: s_cbranch_execnz .LBB3_4 |
262 | | -; GCN-NEXT: .LBB3_2: ; %end |
263 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
264 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
265 | | -; GCN-NEXT: .LBB3_3: ; %cmp.false |
266 | | -; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
267 | | -; GCN-NEXT: ; implicit-def: $vgpr1 |
268 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
269 | | -; GCN-NEXT: s_cbranch_execz .LBB3_2 |
270 | | -; GCN-NEXT: .LBB3_4: ; %cmp.true |
271 | | -; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v1 |
272 | | -; GCN-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
273 | | -; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
274 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
275 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
| 247 | +; SI-LABEL: bitcast_bf16_to_i16: |
| 248 | +; SI: ; %bb.0: |
| 249 | +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 250 | +; SI-NEXT: v_mov_b32_e32 v2, v0 |
| 251 | +; SI-NEXT: v_mov_b32_e32 v0, 0 |
| 252 | +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
| 253 | +; SI-NEXT: v_mul_f32_e32 v1, 1.0, v2 |
| 254 | +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| 255 | +; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
| 256 | +; SI-NEXT: s_cbranch_execnz .LBB3_3 |
| 257 | +; SI-NEXT: ; %bb.1: ; %Flow |
| 258 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 259 | +; SI-NEXT: s_cbranch_execnz .LBB3_4 |
| 260 | +; SI-NEXT: .LBB3_2: ; %end |
| 261 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 262 | +; SI-NEXT: s_setpc_b64 s[30:31] |
| 263 | +; SI-NEXT: .LBB3_3: ; %cmp.false |
| 264 | +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 265 | +; SI-NEXT: ; implicit-def: $vgpr1 |
| 266 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 267 | +; SI-NEXT: s_cbranch_execz .LBB3_2 |
| 268 | +; SI-NEXT: .LBB3_4: ; %cmp.true |
| 269 | +; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v1 |
| 270 | +; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
| 271 | +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 272 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 273 | +; SI-NEXT: s_setpc_b64 s[30:31] |
276 | 274 | ; |
277 | 275 | ; VI-LABEL: bitcast_bf16_to_i16: |
278 | 276 | ; VI: ; %bb.0: |
@@ -358,33 +356,33 @@ end: |
358 | 356 | } |
359 | 357 |
|
360 | 358 | define bfloat @bitcast_f16_to_bf16(half %a, i32 %b) { |
361 | | -; GCN-LABEL: bitcast_f16_to_bf16: |
362 | | -; GCN: ; %bb.0: |
363 | | -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
364 | | -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
365 | | -; GCN-NEXT: v_cvt_f16_f32_e32 v1, v0 |
366 | | -; GCN-NEXT: ; implicit-def: $vgpr0 |
367 | | -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
368 | | -; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
369 | | -; GCN-NEXT: s_cbranch_execnz .LBB4_3 |
370 | | -; GCN-NEXT: ; %bb.1: ; %Flow |
371 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
372 | | -; GCN-NEXT: s_cbranch_execnz .LBB4_4 |
373 | | -; GCN-NEXT: .LBB4_2: ; %end |
374 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
375 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
376 | | -; GCN-NEXT: .LBB4_3: ; %cmp.false |
377 | | -; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v1 |
378 | | -; GCN-NEXT: ; implicit-def: $vgpr1 |
379 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
380 | | -; GCN-NEXT: s_cbranch_execz .LBB4_2 |
381 | | -; GCN-NEXT: .LBB4_4: ; %cmp.true |
382 | | -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v1 |
383 | | -; GCN-NEXT: v_add_f32_e32 v0, 0x38000000, v0 |
384 | | -; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 |
385 | | -; GCN-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
386 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
387 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
| 359 | +; SI-LABEL: bitcast_f16_to_bf16: |
| 360 | +; SI: ; %bb.0: |
| 361 | +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 362 | +; SI-NEXT: v_cvt_f16_f32_e32 v2, v0 |
| 363 | +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
| 364 | +; SI-NEXT: ; implicit-def: $vgpr0 |
| 365 | +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| 366 | +; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
| 367 | +; SI-NEXT: s_cbranch_execnz .LBB4_3 |
| 368 | +; SI-NEXT: ; %bb.1: ; %Flow |
| 369 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 370 | +; SI-NEXT: s_cbranch_execnz .LBB4_4 |
| 371 | +; SI-NEXT: .LBB4_2: ; %end |
| 372 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 373 | +; SI-NEXT: s_setpc_b64 s[30:31] |
| 374 | +; SI-NEXT: .LBB4_3: ; %cmp.false |
| 375 | +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v2 |
| 376 | +; SI-NEXT: ; implicit-def: $vgpr2 |
| 377 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 378 | +; SI-NEXT: s_cbranch_execz .LBB4_2 |
| 379 | +; SI-NEXT: .LBB4_4: ; %cmp.true |
| 380 | +; SI-NEXT: v_cvt_f32_f16_e32 v0, v2 |
| 381 | +; SI-NEXT: v_add_f32_e32 v0, 0x38000000, v0 |
| 382 | +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| 383 | +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| 384 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 385 | +; SI-NEXT: s_setpc_b64 s[30:31] |
388 | 386 | ; |
389 | 387 | ; VI-LABEL: bitcast_f16_to_bf16: |
390 | 388 | ; VI: ; %bb.0: |
@@ -443,34 +441,34 @@ end: |
443 | 441 | } |
444 | 442 |
|
445 | 443 | define half @bitcast_bf16_to_f16(bfloat %a, i32 %b) { |
446 | | -; GCN-LABEL: bitcast_bf16_to_f16: |
447 | | -; GCN: ; %bb.0: |
448 | | -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
449 | | -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
450 | | -; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v0 |
451 | | -; GCN-NEXT: ; implicit-def: $vgpr0 |
452 | | -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc |
453 | | -; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
454 | | -; GCN-NEXT: s_cbranch_execnz .LBB5_3 |
455 | | -; GCN-NEXT: ; %bb.1: ; %Flow |
456 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
457 | | -; GCN-NEXT: s_cbranch_execnz .LBB5_4 |
458 | | -; GCN-NEXT: .LBB5_2: ; %end |
459 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
460 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
461 | | -; GCN-NEXT: .LBB5_3: ; %cmp.false |
462 | | -; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
463 | | -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 |
464 | | -; GCN-NEXT: ; implicit-def: $vgpr1 |
465 | | -; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
466 | | -; GCN-NEXT: s_cbranch_execz .LBB5_2 |
467 | | -; GCN-NEXT: .LBB5_4: ; %cmp.true |
468 | | -; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v1 |
469 | | -; GCN-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
470 | | -; GCN-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
471 | | -; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 |
472 | | -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] |
473 | | -; GCN-NEXT: s_setpc_b64 s[30:31] |
| 444 | +; SI-LABEL: bitcast_bf16_to_f16: |
| 445 | +; SI: ; %bb.0: |
| 446 | +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 447 | +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 |
| 448 | +; SI-NEXT: v_mul_f32_e32 v1, 1.0, v0 |
| 449 | +; SI-NEXT: ; implicit-def: $vgpr0 |
| 450 | +; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc |
| 451 | +; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] |
| 452 | +; SI-NEXT: s_cbranch_execnz .LBB5_3 |
| 453 | +; SI-NEXT: ; %bb.1: ; %Flow |
| 454 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 455 | +; SI-NEXT: s_cbranch_execnz .LBB5_4 |
| 456 | +; SI-NEXT: .LBB5_2: ; %end |
| 457 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 458 | +; SI-NEXT: s_setpc_b64 s[30:31] |
| 459 | +; SI-NEXT: .LBB5_3: ; %cmp.false |
| 460 | +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 461 | +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| 462 | +; SI-NEXT: ; implicit-def: $vgpr1 |
| 463 | +; SI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] |
| 464 | +; SI-NEXT: s_cbranch_execz .LBB5_2 |
| 465 | +; SI-NEXT: .LBB5_4: ; %cmp.true |
| 466 | +; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v1 |
| 467 | +; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0 |
| 468 | +; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 469 | +; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 |
| 470 | +; SI-NEXT: s_or_b64 exec, exec, s[4:5] |
| 471 | +; SI-NEXT: s_setpc_b64 s[30:31] |
474 | 472 | ; |
475 | 473 | ; VI-LABEL: bitcast_bf16_to_f16: |
476 | 474 | ; VI: ; %bb.0: |
|
0 commit comments