@@ -1334,92 +1334,82 @@ main_body:
13341334  ret  void 
13351335}
13361336
1337- define  amdgpu_kernel  void  @flat_atomic_fadd_f64_noret_pat (ptr  %ptr ) #1  {
1337+ define  void  @flat_atomic_fadd_f64_noret_pat (ptr  %ptr ) #1  {
13381338; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat: 
13391339; GFX90A:       ; %bb.0: ; %main_body 
1340- ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1340+ ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
13411341; GFX90A-NEXT:    v_mov_b32_e32 v2, 0 
13421342; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000 
1343- ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0) 
1344- ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 
13451343; GFX90A-NEXT:    buffer_wbl2 
13461344; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] 
13471345; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) 
13481346; GFX90A-NEXT:    buffer_invl2 
13491347; GFX90A-NEXT:    buffer_wbinvl1_vol 
1350- ; GFX90A-NEXT:    s_endpgm  
1348+ ; GFX90A-NEXT:    s_setpc_b64 s[30:31]  
13511349; 
13521350; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat: 
13531351; GFX942:       ; %bb.0: ; %main_body 
1354- ; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1352+ ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
13551353; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0 
1356- ; GFX942-NEXT:    s_waitcnt lgkmcnt(0) 
1357- ; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1] 
13581354; GFX942-NEXT:    buffer_wbl2 sc0 sc1 
13591355; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] sc1 
13601356; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) 
13611357; GFX942-NEXT:    buffer_inv sc0 sc1 
1362- ; GFX942-NEXT:    s_endpgm  
1358+ ; GFX942-NEXT:    s_setpc_b64 s[30:31]  
13631359main_body:
13641360  %ret  = atomicrmw  fadd  ptr  %ptr , double  4 .0  seq_cst , !noalias.addrspace  !1 , !amdgpu.no.fine.grained.memory  !0 
13651361  ret  void 
13661362}
13671363
1368- define  amdgpu_kernel  void  @flat_atomic_fadd_f64_noret_pat_agent (ptr  %ptr ) #1  {
1364+ define  void  @flat_atomic_fadd_f64_noret_pat_agent (ptr  %ptr ) #1  {
13691365; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent: 
13701366; GFX90A:       ; %bb.0: ; %main_body 
1371- ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1367+ ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
13721368; GFX90A-NEXT:    v_mov_b32_e32 v2, 0 
13731369; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000 
1374- ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0) 
1375- ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 
13761370; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] 
13771371; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) 
13781372; GFX90A-NEXT:    buffer_wbinvl1_vol 
1379- ; GFX90A-NEXT:    s_endpgm  
1373+ ; GFX90A-NEXT:    s_setpc_b64 s[30:31]  
13801374; 
13811375; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent: 
13821376; GFX942:       ; %bb.0: ; %main_body 
1383- ; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1377+ ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
13841378; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0 
1385- ; GFX942-NEXT:    s_waitcnt lgkmcnt(0) 
1386- ; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1] 
13871379; GFX942-NEXT:    buffer_wbl2 sc1 
13881380; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] 
13891381; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) 
13901382; GFX942-NEXT:    buffer_inv sc1 
1391- ; GFX942-NEXT:    s_endpgm  
1383+ ; GFX942-NEXT:    s_setpc_b64 s[30:31]  
13921384main_body:
13931385  %ret  = atomicrmw  fadd  ptr  %ptr , double  4 .0  syncscope("agent" ) seq_cst , !noalias.addrspace  !1 , !amdgpu.no.fine.grained.memory  !0 
13941386  ret  void 
13951387}
13961388
1397- define  amdgpu_kernel  void  @flat_atomic_fadd_f64_noret_pat_system (ptr  %ptr ) #1  {
1389+ define  void  @flat_atomic_fadd_f64_noret_pat_system (ptr  %ptr ) #1  {
13981390; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system: 
13991391; GFX90A:       ; %bb.0: ; %main_body 
1400- ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1392+ ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
14011393; GFX90A-NEXT:    v_mov_b32_e32 v2, 0 
14021394; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000 
1403- ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0) 
1404- ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 
14051395; GFX90A-NEXT:    buffer_wbl2 
14061396; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] 
14071397; GFX90A-NEXT:    s_waitcnt vmcnt(0) 
14081398; GFX90A-NEXT:    buffer_invl2 
14091399; GFX90A-NEXT:    buffer_wbinvl1_vol 
1410- ; GFX90A-NEXT:    s_endpgm 
1400+ ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0) 
1401+ ; GFX90A-NEXT:    s_setpc_b64 s[30:31] 
14111402; 
14121403; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_system: 
14131404; GFX942:       ; %bb.0: ; %main_body 
1414- ; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1405+ ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
14151406; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0 
1416- ; GFX942-NEXT:    s_waitcnt lgkmcnt(0) 
1417- ; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1] 
14181407; GFX942-NEXT:    buffer_wbl2 sc0 sc1 
14191408; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] sc1 
14201409; GFX942-NEXT:    s_waitcnt vmcnt(0) 
14211410; GFX942-NEXT:    buffer_inv sc0 sc1 
1422- ; GFX942-NEXT:    s_endpgm 
1411+ ; GFX942-NEXT:    s_waitcnt lgkmcnt(0) 
1412+ ; GFX942-NEXT:    s_setpc_b64 s[30:31] 
14231413main_body:
14241414  %ret  = atomicrmw  fadd  ptr  %ptr , double  4 .0  syncscope("one-as" ) seq_cst , !noalias.addrspace  !1 , !amdgpu.no.fine.grained.memory  !0 
14251415  ret  void 
@@ -1506,30 +1496,26 @@ main_body:
15061496  ret  double  %ret 
15071497}
15081498
1509- define  amdgpu_kernel  void  @flat_atomic_fadd_f64_noret_pat_agent_safe (ptr  %ptr ) {
1499+ define  void  @flat_atomic_fadd_f64_noret_pat_agent_safe (ptr  %ptr ) {
15101500; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: 
15111501; GFX90A:       ; %bb.0: ; %main_body 
1512- ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1502+ ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
15131503; GFX90A-NEXT:    v_mov_b32_e32 v2, 0 
15141504; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000 
1515- ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0) 
1516- ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 
15171505; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] 
15181506; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) 
15191507; GFX90A-NEXT:    buffer_wbinvl1_vol 
1520- ; GFX90A-NEXT:    s_endpgm  
1508+ ; GFX90A-NEXT:    s_setpc_b64 s[30:31]  
15211509; 
15221510; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: 
15231511; GFX942:       ; %bb.0: ; %main_body 
1524- ; GFX942-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24  
1512+ ; GFX942-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)  
15251513; GFX942-NEXT:    v_mov_b64_e32 v[2:3], 4.0 
1526- ; GFX942-NEXT:    s_waitcnt lgkmcnt(0) 
1527- ; GFX942-NEXT:    v_mov_b64_e32 v[0:1], s[0:1] 
15281514; GFX942-NEXT:    buffer_wbl2 sc1 
15291515; GFX942-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3] 
15301516; GFX942-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) 
15311517; GFX942-NEXT:    buffer_inv sc1 
1532- ; GFX942-NEXT:    s_endpgm  
1518+ ; GFX942-NEXT:    s_setpc_b64 s[30:31]  
15331519main_body:
15341520  %ret  = atomicrmw  fadd  ptr  %ptr , double  4 .0  syncscope("agent" ) seq_cst , !noalias.addrspace  !1 , !amdgpu.no.fine.grained.memory  !0 
15351521  ret  void 
0 commit comments