@@ -1807,6 +1807,7 @@ define amdgpu_kernel void @fcmp_v2f16_lt(
18071807; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
18081808; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
18091809; GFX12-NEXT: v_cmp_lt_f16_e32 vcc_lo, v3, v2
1810+ ; GFX12-NEXT: s_wait_alu 0xfffd
18101811; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
18111812; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
18121813; GFX12-NEXT: s_endpgm
@@ -1940,6 +1941,7 @@ define amdgpu_kernel void @fcmp_v2f16_eq(
19401941; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
19411942; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
19421943; GFX12-NEXT: v_cmp_eq_f16_e32 vcc_lo, v3, v2
1944+ ; GFX12-NEXT: s_wait_alu 0xfffd
19431945; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
19441946; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
19451947; GFX12-NEXT: s_endpgm
@@ -2072,6 +2074,7 @@ define amdgpu_kernel void @fcmp_v2f16_le(
20722074; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
20732075; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
20742076; GFX12-NEXT: v_cmp_le_f16_e32 vcc_lo, v3, v2
2077+ ; GFX12-NEXT: s_wait_alu 0xfffd
20752078; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
20762079; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
20772080; GFX12-NEXT: s_endpgm
@@ -2204,6 +2207,7 @@ define amdgpu_kernel void @fcmp_v2f16_gt(
22042207; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
22052208; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
22062209; GFX12-NEXT: v_cmp_gt_f16_e32 vcc_lo, v3, v2
2210+ ; GFX12-NEXT: s_wait_alu 0xfffd
22072211; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
22082212; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
22092213; GFX12-NEXT: s_endpgm
@@ -2337,6 +2341,7 @@ define amdgpu_kernel void @fcmp_v2f16_lg(
23372341; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
23382342; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
23392343; GFX12-NEXT: v_cmp_lg_f16_e32 vcc_lo, v3, v2
2344+ ; GFX12-NEXT: s_wait_alu 0xfffd
23402345; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
23412346; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
23422347; GFX12-NEXT: s_endpgm
@@ -2470,6 +2475,7 @@ define amdgpu_kernel void @fcmp_v2f16_ge(
24702475; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
24712476; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
24722477; GFX12-NEXT: v_cmp_ge_f16_e32 vcc_lo, v3, v2
2478+ ; GFX12-NEXT: s_wait_alu 0xfffd
24732479; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
24742480; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
24752481; GFX12-NEXT: s_endpgm
@@ -2603,6 +2609,7 @@ define amdgpu_kernel void @fcmp_v2f16_o(
26032609; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
26042610; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
26052611; GFX12-NEXT: v_cmp_o_f16_e32 vcc_lo, v3, v2
2612+ ; GFX12-NEXT: s_wait_alu 0xfffd
26062613; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
26072614; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
26082615; GFX12-NEXT: s_endpgm
@@ -2736,6 +2743,7 @@ define amdgpu_kernel void @fcmp_v2f16_u(
27362743; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
27372744; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
27382745; GFX12-NEXT: v_cmp_u_f16_e32 vcc_lo, v3, v2
2746+ ; GFX12-NEXT: s_wait_alu 0xfffd
27392747; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
27402748; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
27412749; GFX12-NEXT: s_endpgm
@@ -2868,6 +2876,7 @@ define amdgpu_kernel void @fcmp_v2f16_nge(
28682876; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
28692877; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
28702878; GFX12-NEXT: v_cmp_nge_f16_e32 vcc_lo, v3, v2
2879+ ; GFX12-NEXT: s_wait_alu 0xfffd
28712880; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
28722881; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
28732882; GFX12-NEXT: s_endpgm
@@ -3000,6 +3009,7 @@ define amdgpu_kernel void @fcmp_v2f16_nlg(
30003009; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
30013010; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
30023011; GFX12-NEXT: v_cmp_nlg_f16_e32 vcc_lo, v3, v2
3012+ ; GFX12-NEXT: s_wait_alu 0xfffd
30033013; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
30043014; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
30053015; GFX12-NEXT: s_endpgm
@@ -3133,6 +3143,7 @@ define amdgpu_kernel void @fcmp_v2f16_ngt(
31333143; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
31343144; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
31353145; GFX12-NEXT: v_cmp_ngt_f16_e32 vcc_lo, v3, v2
3146+ ; GFX12-NEXT: s_wait_alu 0xfffd
31363147; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
31373148; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
31383149; GFX12-NEXT: s_endpgm
@@ -3265,6 +3276,7 @@ define amdgpu_kernel void @fcmp_v2f16_nle(
32653276; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
32663277; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
32673278; GFX12-NEXT: v_cmp_nle_f16_e32 vcc_lo, v3, v2
3279+ ; GFX12-NEXT: s_wait_alu 0xfffd
32683280; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
32693281; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
32703282; GFX12-NEXT: s_endpgm
@@ -3397,6 +3409,7 @@ define amdgpu_kernel void @fcmp_v2f16_neq(
33973409; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
33983410; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
33993411; GFX12-NEXT: v_cmp_neq_f16_e32 vcc_lo, v3, v2
3412+ ; GFX12-NEXT: s_wait_alu 0xfffd
34003413; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
34013414; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
34023415; GFX12-NEXT: s_endpgm
@@ -3529,6 +3542,7 @@ define amdgpu_kernel void @fcmp_v2f16_nlt(
35293542; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo
35303543; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
35313544; GFX12-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v3, v2
3545+ ; GFX12-NEXT: s_wait_alu 0xfffd
35323546; GFX12-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo
35333547; GFX12-NEXT: buffer_store_b64 v[0:1], off, s[8:11], null
35343548; GFX12-NEXT: s_endpgm
0 commit comments