@@ -344,7 +344,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
344
344
; GFX1250: ; %bb.0: ; %entry
345
345
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
346
346
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
347
- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
347
+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
348
348
; GFX1250-NEXT: s_endpgm
349
349
entry:
350
350
%a.cvt = fptrunc float %a to bfloat
@@ -380,7 +380,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
380
380
; GFX1250: ; %bb.0: ; %entry
381
381
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
382
382
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, |v0|, s0
383
- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
383
+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
384
384
; GFX1250-NEXT: s_endpgm
385
385
entry:
386
386
%a.abs = call float @llvm.fabs.f32 (float %a )
@@ -417,7 +417,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
417
417
; GFX1250: ; %bb.0: ; %entry
418
418
; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
419
419
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, -v0, s0
420
- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
420
+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
421
421
; GFX1250-NEXT: s_endpgm
422
422
entry:
423
423
%a.neg = fneg float %a
@@ -480,7 +480,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
480
480
; GFX1250-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
481
481
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
482
482
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
483
- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
483
+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
484
484
; GFX1250-NEXT: s_endpgm
485
485
entry:
486
486
%a.cvt = fptrunc double %a to bfloat
@@ -543,7 +543,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
543
543
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
544
544
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
545
545
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
546
- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
546
+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
547
547
; GFX1250-NEXT: s_endpgm
548
548
entry:
549
549
%a.neg = fneg double %a
@@ -607,7 +607,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
607
607
; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
608
608
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
609
609
; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
610
- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
610
+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
611
611
; GFX1250-NEXT: s_endpgm
612
612
entry:
613
613
%a.abs = call double @llvm.fabs.f64 (double %a )
0 commit comments