@@ -344,7 +344,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16(float %a, ptr %out) {
344344; GFX1250: ; %bb.0: ; %entry
345345; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
346346; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
347- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
347+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
348348; GFX1250-NEXT: s_endpgm
349349entry:
350350 %a.cvt = fptrunc float %a to bfloat
@@ -380,7 +380,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_abs(float %a, ptr %out) {
380380; GFX1250: ; %bb.0: ; %entry
381381; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
382382; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, |v0|, s0
383- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
383+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
384384; GFX1250-NEXT: s_endpgm
385385entry:
386386 %a.abs = call float @llvm.fabs.f32 (float %a )
@@ -417,7 +417,7 @@ define amdgpu_ps void @fptrunc_f32_to_bf16_neg(float %a, ptr %out) {
417417; GFX1250: ; %bb.0: ; %entry
418418; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
419419; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, -v0, s0
420- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
420+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
421421; GFX1250-NEXT: s_endpgm
422422entry:
423423 %a.neg = fneg float %a
@@ -480,7 +480,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16(double %a, ptr %out) {
480480; GFX1250-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
481481; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
482482; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
483- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
483+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
484484; GFX1250-NEXT: s_endpgm
485485entry:
486486 %a.cvt = fptrunc double %a to bfloat
@@ -543,7 +543,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_neg(double %a, ptr %out) {
543543; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
544544; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
545545; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
546- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
546+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
547547; GFX1250-NEXT: s_endpgm
548548entry:
549549 %a.neg = fneg double %a
@@ -607,7 +607,7 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
607607; GFX1250-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc_lo
608608; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
609609; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
610- ; GFX1250-NEXT: flat_store_b16 v[2:3], v0
610+ ; GFX1250-NEXT: flat_store_b16 v[2:3], v0 scope:SCOPE_SE
611611; GFX1250-NEXT: s_endpgm
612612entry:
613613 %a.abs = call double @llvm.fabs.f64 (double %a )
0 commit comments