@@ -111,75 +111,65 @@ define amdgpu_ps float @v_test_cvt_f32_bf16_v(float %src) {
111111define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v (<2 x double > %src ) {
112112; GFX-942-LABEL: v_test_cvt_v2f64_v2bf16_v:
113113; GFX-942: ; %bb.0:
114- ; GFX-942-NEXT: v_cvt_f32_f64_e64 v6, | v[0:1]|
114+ ; GFX-942-NEXT: v_cvt_f32_f64_e32 v6, v[0:1]
115115; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
116116; GFX-942-NEXT: v_and_b32_e32 v7, 1, v6
117- ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
118- ; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], | v[0:1]| , v[4:5]
119- ; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc , 1, v7
117+ ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, | v[4:5]|
118+ ; GFX-942-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[4:5]
119+ ; GFX-942-NEXT: v_cmp_eq_u32_e64 s[0:1] , 1, v7
120120; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
121121; GFX-942-NEXT: v_add_u32_e32 v4, v6, v4
122- ; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc
122+ ; GFX-942-NEXT: s_or_b64 vcc, vcc, s[0:1]
123123; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
124- ; GFX-942-NEXT: s_brev_b32 s4, 1
125- ; GFX-942-NEXT: v_and_or_b32 v5, v1, s4, v4
126- ; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1
127- ; GFX-942-NEXT: s_movk_i32 s5, 0x7fff
128- ; GFX-942-NEXT: v_add3_u32 v4, v4, v5, s5
129- ; GFX-942-NEXT: v_or_b32_e32 v5, 0x400000, v5
124+ ; GFX-942-NEXT: v_bfe_u32 v5, v4, 16, 1
125+ ; GFX-942-NEXT: s_movk_i32 s4, 0x7fff
126+ ; GFX-942-NEXT: v_add3_u32 v5, v5, v4, s4
127+ ; GFX-942-NEXT: v_or_b32_e32 v4, 0x400000, v4
130128; GFX-942-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
131129; GFX-942-NEXT: s_nop 1
132- ; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v5 , vcc
133- ; GFX-942-NEXT: v_cvt_f32_f64_e64 v5, | v[2:3]|
130+ ; GFX-942-NEXT: v_cndmask_b32_e32 v4, v5, v4 , vcc
131+ ; GFX-942-NEXT: v_cvt_f32_f64_e32 v5, v[2:3]
134132; GFX-942-NEXT: v_cvt_f64_f32_e32 v[0:1], v5
135133; GFX-942-NEXT: v_and_b32_e32 v6, 1, v5
136- ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, v[0:1]
137- ; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], | v[2:3]| , v[0:1]
138- ; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc , 1, v6
134+ ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, | v[0:1]|
135+ ; GFX-942-NEXT: v_cmp_nlg_f64_e32 vcc, v[2:3], v[0:1]
136+ ; GFX-942-NEXT: v_cmp_eq_u32_e64 s[0:1] , 1, v6
139137; GFX-942-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
140138; GFX-942-NEXT: v_add_u32_e32 v0, v5, v0
141- ; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc
139+ ; GFX-942-NEXT: s_or_b64 vcc, vcc, s[0:1]
142140; GFX-942-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
143- ; GFX-942-NEXT: v_and_or_b32 v1, v3, s4, v0
144- ; GFX-942-NEXT: v_bfe_u32 v0, v0, 16, 1
145- ; GFX-942-NEXT: v_add3_u32 v0, v0, v1, s5
146- ; GFX-942-NEXT: v_or_b32_e32 v1, 0x400000, v1
141+ ; GFX-942-NEXT: v_bfe_u32 v1, v0, 16, 1
142+ ; GFX-942-NEXT: v_add3_u32 v1, v1, v0, s4
143+ ; GFX-942-NEXT: v_or_b32_e32 v0, 0x400000, v0
147144; GFX-942-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[2:3]
148145; GFX-942-NEXT: s_mov_b32 s0, 0x7060302
149146; GFX-942-NEXT: s_nop 0
150- ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v0, v1 , vcc
147+ ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v1, v0 , vcc
151148; GFX-942-NEXT: v_perm_b32 v0, v0, v4, s0
152149; GFX-942-NEXT: ; return to shader part epilog
153150;
154151; GFX-950-LABEL: v_test_cvt_v2f64_v2bf16_v:
155152; GFX-950: ; %bb.0:
156- ; GFX-950-NEXT: v_mov_b32_e32 v4, v3
157- ; GFX-950-NEXT: v_and_b32_e32 v3, 0x7fffffff, v4
158- ; GFX-950-NEXT: v_mov_b32_e32 v5, v1
159- ; GFX-950-NEXT: v_cvt_f32_f64_e32 v1, v[2:3]
160- ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[6:7], v1
161- ; GFX-950-NEXT: v_and_b32_e32 v8, 1, v1
162- ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], v[2:3], v[6:7]
163- ; GFX-950-NEXT: v_cmp_nlg_f64_e32 vcc, v[2:3], v[6:7]
164- ; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v8
153+ ; GFX-950-NEXT: v_cvt_f32_f64_e32 v6, v[2:3]
154+ ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
155+ ; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
156+ ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, |v[4:5]|
157+ ; GFX-950-NEXT: v_cmp_nlg_f64_e32 vcc, v[2:3], v[4:5]
158+ ; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v7
165159; GFX-950-NEXT: v_cndmask_b32_e64 v2, -1, 1, s[2:3]
166- ; GFX-950-NEXT: v_add_u32_e32 v2, v1 , v2
160+ ; GFX-950-NEXT: v_add_u32_e32 v2, v6 , v2
167161; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
168- ; GFX-950-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
169- ; GFX-950-NEXT: s_brev_b32 s4, 1
170- ; GFX-950-NEXT: v_and_or_b32 v4, v4, s4, v1
171- ; GFX-950-NEXT: v_and_b32_e32 v1, 0x7fffffff, v5
172- ; GFX-950-NEXT: v_cvt_f32_f64_e32 v6, v[0:1]
173- ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[2:3], v6
174- ; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
175- ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], v[0:1], v[2:3]
162+ ; GFX-950-NEXT: v_cvt_f32_f64_e32 v5, v[0:1]
163+ ; GFX-950-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc
164+ ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[2:3], v5
165+ ; GFX-950-NEXT: v_and_b32_e32 v6, 1, v5
166+ ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, |v[2:3]|
176167; GFX-950-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[2:3]
177- ; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v7
168+ ; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v6
178169; GFX-950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
179- ; GFX-950-NEXT: v_add_u32_e32 v0, v6 , v0
170+ ; GFX-950-NEXT: v_add_u32_e32 v0, v5 , v0
180171; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
181- ; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
182- ; GFX-950-NEXT: v_and_or_b32 v0, v5, s4, v0
172+ ; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
183173; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
184174; GFX-950-NEXT: ; return to shader part epilog
185175 %res = fptrunc <2 x double > %src to <2 x bfloat>
@@ -348,42 +338,38 @@ entry:
348338define amdgpu_ps void @fptrunc_f64_to_bf16 (double %a , ptr %out ) {
349339; GFX-942-LABEL: fptrunc_f64_to_bf16:
350340; GFX-942: ; %bb.0: ; %entry
351- ; GFX-942-NEXT: v_cvt_f32_f64_e64 v6, | v[0:1]|
341+ ; GFX-942-NEXT: v_cvt_f32_f64_e32 v6, v[0:1]
352342; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
353343; GFX-942-NEXT: v_and_b32_e32 v7, 1, v6
354- ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
355- ; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], | v[0:1]| , v[4:5]
356- ; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc , 1, v7
344+ ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, | v[4:5]|
345+ ; GFX-942-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[4:5]
346+ ; GFX-942-NEXT: v_cmp_eq_u32_e64 s[0:1] , 1, v7
357347; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
358348; GFX-942-NEXT: v_add_u32_e32 v4, v6, v4
359- ; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc
349+ ; GFX-942-NEXT: s_or_b64 vcc, vcc, s[0:1]
360350; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
361- ; GFX-942-NEXT: s_brev_b32 s0, 1
362- ; GFX-942-NEXT: v_and_or_b32 v5, v1, s0, v4
363- ; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1
351+ ; GFX-942-NEXT: v_bfe_u32 v5, v4, 16, 1
364352; GFX-942-NEXT: s_movk_i32 s0, 0x7fff
365- ; GFX-942-NEXT: v_add3_u32 v4, v4, v5 , s0
366- ; GFX-942-NEXT: v_or_b32_e32 v5 , 0x400000, v5
353+ ; GFX-942-NEXT: v_add3_u32 v5, v5, v4 , s0
354+ ; GFX-942-NEXT: v_or_b32_e32 v4 , 0x400000, v4
367355; GFX-942-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1]
368356; GFX-942-NEXT: s_nop 1
369- ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5 , vcc
357+ ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v5, v4 , vcc
370358; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
371359; GFX-942-NEXT: s_endpgm
372360;
373361; GFX-950-LABEL: fptrunc_f64_to_bf16:
374362; GFX-950: ; %bb.0: ; %entry
375- ; GFX-950-NEXT: v_cvt_f32_f64_e64 v6, | v[0:1]|
363+ ; GFX-950-NEXT: v_cvt_f32_f64_e32 v6, v[0:1]
376364; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
377365; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
378- ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
379- ; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], | v[0:1]| , v[4:5]
380- ; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc , 1, v7
366+ ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, | v[4:5]|
367+ ; GFX-950-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[4:5]
368+ ; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1] , 1, v7
381369; GFX-950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
382370; GFX-950-NEXT: v_add_u32_e32 v0, v6, v0
383- ; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
371+ ; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
384372; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
385- ; GFX-950-NEXT: s_brev_b32 s0, 1
386- ; GFX-950-NEXT: v_and_or_b32 v0, v1, s0, v0
387373; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
388374; GFX-950-NEXT: flat_store_short v[2:3], v0
389375; GFX-950-NEXT: s_endpgm
@@ -396,44 +382,38 @@ entry:
396382define amdgpu_ps void @fptrunc_f64_to_bf16_neg (double %a , ptr %out ) {
397383; GFX-942-LABEL: fptrunc_f64_to_bf16_neg:
398384; GFX-942: ; %bb.0: ; %entry
399- ; GFX-942-NEXT: v_cvt_f32_f64_e64 v7, | v[0:1]|
400- ; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
401- ; GFX-942-NEXT: v_and_b32_e32 v8 , 1, v7
402- ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
403- ; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], | v[0:1]| , v[4:5]
404- ; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
385+ ; GFX-942-NEXT: v_cvt_f32_f64_e64 v6, - v[0:1]
386+ ; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
387+ ; GFX-942-NEXT: v_and_b32_e32 v7 , 1, v6
388+ ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, | v[4:5]|
389+ ; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], - v[0:1], v[4:5]
390+ ; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
405391; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
406- ; GFX-942-NEXT: v_add_u32_e32 v4, v7 , v4
392+ ; GFX-942-NEXT: v_add_u32_e32 v4, v6 , v4
407393; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc
408- ; GFX-942-NEXT: s_brev_b32 s4, 1
409- ; GFX-942-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
410- ; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
411- ; GFX-942-NEXT: v_and_or_b32 v5, v6, s4, v4
412- ; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1
394+ ; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
395+ ; GFX-942-NEXT: v_bfe_u32 v5, v4, 16, 1
413396; GFX-942-NEXT: s_movk_i32 s0, 0x7fff
414- ; GFX-942-NEXT: v_add3_u32 v4, v4, v5 , s0
415- ; GFX-942-NEXT: v_or_b32_e32 v5 , 0x400000, v5
397+ ; GFX-942-NEXT: v_add3_u32 v5, v5, v4 , s0
398+ ; GFX-942-NEXT: v_or_b32_e32 v4 , 0x400000, v4
416399; GFX-942-NEXT: v_cmp_u_f64_e64 vcc, -v[0:1], -v[0:1]
417400; GFX-942-NEXT: s_nop 1
418- ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5 , vcc
401+ ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v5, v4 , vcc
419402; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
420403; GFX-942-NEXT: s_endpgm
421404;
422405; GFX-950-LABEL: fptrunc_f64_to_bf16_neg:
423406; GFX-950: ; %bb.0: ; %entry
424- ; GFX-950-NEXT: v_cvt_f32_f64_e64 v7, | v[0:1]|
425- ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
426- ; GFX-950-NEXT: v_and_b32_e32 v8 , 1, v7
427- ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
428- ; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], | v[0:1]| , v[4:5]
429- ; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
407+ ; GFX-950-NEXT: v_cvt_f32_f64_e64 v6, - v[0:1]
408+ ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
409+ ; GFX-950-NEXT: v_and_b32_e32 v7 , 1, v6
410+ ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, | v[4:5]|
411+ ; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], - v[0:1], v[4:5]
412+ ; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
430413; GFX-950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
431- ; GFX-950-NEXT: v_add_u32_e32 v0, v7 , v0
414+ ; GFX-950-NEXT: v_add_u32_e32 v0, v6 , v0
432415; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
433- ; GFX-950-NEXT: s_brev_b32 s4, 1
434- ; GFX-950-NEXT: v_xor_b32_e32 v6, 0x80000000, v1
435- ; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
436- ; GFX-950-NEXT: v_and_or_b32 v0, v6, s4, v0
416+ ; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
437417; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
438418; GFX-950-NEXT: flat_store_short v[2:3], v0
439419; GFX-950-NEXT: s_endpgm
@@ -447,44 +427,38 @@ entry:
447427define amdgpu_ps void @fptrunc_f64_to_bf16_abs (double %a , ptr %out ) {
448428; GFX-942-LABEL: fptrunc_f64_to_bf16_abs:
449429; GFX-942: ; %bb.0: ; %entry
450- ; GFX-942-NEXT: v_cvt_f32_f64_e64 v7 , |v[0:1]|
451- ; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
452- ; GFX-942-NEXT: v_and_b32_e32 v8 , 1, v7
453- ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
430+ ; GFX-942-NEXT: v_cvt_f32_f64_e64 v6 , |v[0:1]|
431+ ; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
432+ ; GFX-942-NEXT: v_and_b32_e32 v7 , 1, v6
433+ ; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, | v[4:5]|
454434; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
455- ; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
435+ ; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
456436; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
457- ; GFX-942-NEXT: v_add_u32_e32 v4, v7 , v4
437+ ; GFX-942-NEXT: v_add_u32_e32 v4, v6 , v4
458438; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc
459- ; GFX-942-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
460- ; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
461- ; GFX-942-NEXT: s_brev_b32 s0, 1
462- ; GFX-942-NEXT: v_and_or_b32 v5, v6, s0, v4
463- ; GFX-942-NEXT: v_bfe_u32 v4, v4, 16, 1
439+ ; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
440+ ; GFX-942-NEXT: v_bfe_u32 v5, v4, 16, 1
464441; GFX-942-NEXT: s_movk_i32 s0, 0x7fff
465- ; GFX-942-NEXT: v_add3_u32 v4, v4, v5 , s0
466- ; GFX-942-NEXT: v_or_b32_e32 v5 , 0x400000, v5
442+ ; GFX-942-NEXT: v_add3_u32 v5, v5, v4 , s0
443+ ; GFX-942-NEXT: v_or_b32_e32 v4 , 0x400000, v4
467444; GFX-942-NEXT: v_cmp_u_f64_e64 vcc, |v[0:1]|, |v[0:1]|
468445; GFX-942-NEXT: s_nop 1
469- ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v4, v5 , vcc
446+ ; GFX-942-NEXT: v_cndmask_b32_e32 v0, v5, v4 , vcc
470447; GFX-942-NEXT: flat_store_short_d16_hi v[2:3], v0
471448; GFX-942-NEXT: s_endpgm
472449;
473450; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:
474451; GFX-950: ; %bb.0: ; %entry
475- ; GFX-950-NEXT: v_cvt_f32_f64_e64 v7 , |v[0:1]|
476- ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v7
477- ; GFX-950-NEXT: v_and_b32_e32 v8 , 1, v7
478- ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, v[4:5]
452+ ; GFX-950-NEXT: v_cvt_f32_f64_e64 v6 , |v[0:1]|
453+ ; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
454+ ; GFX-950-NEXT: v_and_b32_e32 v7 , 1, v6
455+ ; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, | v[4:5]|
479456; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
480- ; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v8
457+ ; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
481458; GFX-950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
482- ; GFX-950-NEXT: v_add_u32_e32 v0, v7 , v0
459+ ; GFX-950-NEXT: v_add_u32_e32 v0, v6 , v0
483460; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
484- ; GFX-950-NEXT: v_and_b32_e32 v6, 0x7fffffff, v1
485- ; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
486- ; GFX-950-NEXT: s_brev_b32 s0, 1
487- ; GFX-950-NEXT: v_and_or_b32 v0, v6, s0, v0
461+ ; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
488462; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
489463; GFX-950-NEXT: flat_store_short v[2:3], v0
490464; GFX-950-NEXT: s_endpgm
0 commit comments