diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll index 51dc5ceb82b41..56edb29281944 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <32 x float> @bitcast_v32i32_to_v32f32(<32 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v32i32_to_v32f32: @@ -92753,4954 +92753,6 @@ end: ret <64 x i16> %phi } -define <128 x i8> @bitcast_v64i16_to_v128i8(<64 x i16> %a, i32 %b) { -; GCN-LABEL: bitcast_v64i16_to_v128i8: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:132 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:124 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:116 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:108 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:104 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:100 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:96 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:92 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:88 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:84 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:76 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:12 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:136 -; GCN-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:68 -; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:64 -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:36 -; GCN-NEXT: s_waitcnt vmcnt(3) -; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15 -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:60 -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 -; GCN-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:52 -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:48 -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:44 -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:28 -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:24 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v6 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v10 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v14 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v18 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v22 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v26 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v30 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v29 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(10) expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v33 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v32 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v31 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v27 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v25 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: v_lshlrev_b32_e32 v34, 16, v13 -; GCN-NEXT: v_lshlrev_b32_e32 v29, 16, v7 -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:20 -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:16 -; GCN-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:32 -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GCN-NEXT: buffer_load_dword v36, off, s[0:3], s32 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:780 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v8 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:776 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v12 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:772 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v16 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:768 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v20 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:764 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v24 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:760 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v28 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:756 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(8) expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v31 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:752 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v7 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:748 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v9 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:744 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v49 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:728 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v48 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:736 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v39 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:740 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v11 -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:732 ; 4-byte Folded Spill -; GCN-NEXT: v_lshlrev_b32_e32 v27, 16, v38 -; GCN-NEXT: v_lshlrev_b32_e32 v25, 16, v37 -; GCN-NEXT: ; implicit-def: $vgpr40 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr54 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr50 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr51 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr52 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr53 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr55 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr41 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr42 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr43 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr44 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr45 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr46 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr47 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr56 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr57 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr58 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr59 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr60 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr61 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr62 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr63 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr6 -; GCN-NEXT: ; kill: killed $vgpr6 -; GCN-NEXT: ; implicit-def: $vgpr6 -; GCN-NEXT: ; kill: killed $vgpr6 -; GCN-NEXT: ; implicit-def: $vgpr6 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr10 -; GCN-NEXT: ; kill: killed $vgpr10 -; GCN-NEXT: ; implicit-def: $vgpr10 -; GCN-NEXT: ; kill: killed $vgpr10 -; GCN-NEXT: ; implicit-def: $vgpr10 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr14 -; GCN-NEXT: ; kill: killed $vgpr14 -; GCN-NEXT: ; implicit-def: $vgpr14 -; GCN-NEXT: ; kill: killed $vgpr14 -; GCN-NEXT: ; implicit-def: $vgpr14 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr18 -; GCN-NEXT: ; kill: killed $vgpr18 -; GCN-NEXT: ; implicit-def: $vgpr18 -; GCN-NEXT: ; kill: killed $vgpr18 -; GCN-NEXT: ; implicit-def: $vgpr18 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr22 -; GCN-NEXT: ; kill: killed $vgpr22 -; GCN-NEXT: ; implicit-def: $vgpr22 -; GCN-NEXT: ; kill: killed $vgpr22 -; GCN-NEXT: ; implicit-def: $vgpr22 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr26 -; GCN-NEXT: ; kill: killed $vgpr26 -; GCN-NEXT: ; implicit-def: $vgpr26 -; GCN-NEXT: ; kill: killed $vgpr26 -; GCN-NEXT: ; implicit-def: $vgpr26 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr30 -; GCN-NEXT: ; kill: killed $vgpr30 -; GCN-NEXT: ; implicit-def: $vgpr30 -; GCN-NEXT: ; kill: killed $vgpr30 -; GCN-NEXT: ; implicit-def: $vgpr30 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr32 -; GCN-NEXT: ; kill: killed $vgpr32 -; GCN-NEXT: ; implicit-def: $vgpr32 -; GCN-NEXT: ; kill: killed $vgpr32 -; GCN-NEXT: ; implicit-def: $vgpr32 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr33 -; GCN-NEXT: ; kill: killed $vgpr33 -; GCN-NEXT: ; implicit-def: $vgpr33 -; GCN-NEXT: ; kill: killed $vgpr33 -; GCN-NEXT: ; implicit-def: $vgpr33 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: ; kill: killed $vgpr5 -; GCN-NEXT: ; implicit-def: $vgpr5 -; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB49_2 -; GCN-NEXT: ; %bb.1: ; %cmp.false -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v40, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v4 -; GCN-NEXT: v_or_b32_e32 v54, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v50, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v8 -; GCN-NEXT: v_or_b32_e32 v51, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v52, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v12 -; GCN-NEXT: v_or_b32_e32 v53, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v55, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v16 -; GCN-NEXT: v_or_b32_e32 v41, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v42, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v20 -; GCN-NEXT: v_or_b32_e32 v43, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v44, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v24 -; GCN-NEXT: v_or_b32_e32 v45, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v46, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v28 -; GCN-NEXT: v_or_b32_e32 v47, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v56, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v36 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v31 -; GCN-NEXT: v_or_b32_e32 v57, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v3 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v58, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v13 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v7 -; GCN-NEXT: v_or_b32_e32 v59, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v15 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v60, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v35 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 16, v9 -; GCN-NEXT: v_or_b32_e32 v61, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v17 -; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v21 -; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v19 -; GCN-NEXT: v_and_b32_e32 v5, 0xffff, v23 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v10, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v13, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v15, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v17, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v19, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v21, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v23, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v33, 0xffff, v6 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v62, v1, v6 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v4 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v49 -; GCN-NEXT: v_or_b32_e32 v63, v2, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v2, v3, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v12 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v48 -; GCN-NEXT: v_or_b32_e32 v6, v5, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v10, v10, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v20 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v39 -; GCN-NEXT: v_or_b32_e32 v14, v13, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v18, v15, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v28 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v11 -; GCN-NEXT: v_or_b32_e32 v22, v17, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v31 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: v_or_b32_e32 v26, v19, v34 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v7 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: v_or_b32_e32 v30, v21, v27 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v9 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: v_or_b32_e32 v32, v23, v29 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v49 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: v_or_b32_e32 v33, v33, v25 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v48 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v4, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v8, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v12, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v16, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v20, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v24, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v28, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v31, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v7, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v9, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v49, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v48, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v39 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v39, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v11 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v11, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v38 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v38, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v37 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_bfe_u32 v1, v37, 8, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: v_alignbit_b32 v5, v54, v40, 24 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v54, v40, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v54, v40, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v51, v50, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v51, v50, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v51, v50, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v53, v52, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v53, v52, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v53, v52, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v41, v55, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v41, v55, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v41, v55, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v43, v42, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v43, v42, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v43, v42, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v45, v44, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v45, v44, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v45, v44, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:716 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v47, v46, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v47, v46, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v47, v46, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:720 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v57, v56, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v57, v56, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v57, v56, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v59, v58, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v59, v58, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v59, v58, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v61, v60, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v61, v60, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v61, v60, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v63, v62, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v63, v62, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v63, v62, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v6, v2, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v6, v2, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v6, v2, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v14, v10, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v14, v10, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v14, v10, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v22, v18, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v22, v18, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v22, v18, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v30, v26, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v30, v26, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v30, v26, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v33, v32, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v33, v32, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v33, v32, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v54 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v51 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:724 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v53 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v41 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v43 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v45 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v47 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v57 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v59 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v61 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v63 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v6 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v14 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v22 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v30 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v33 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: ; implicit-def: $vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; kill: killed $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr36 -; GCN-NEXT: ; implicit-def: $vgpr3 -; GCN-NEXT: ; implicit-def: $vgpr13 -; GCN-NEXT: ; implicit-def: $vgpr15 -; GCN-NEXT: ; implicit-def: $vgpr35 -; GCN-NEXT: ; implicit-def: $vgpr17 -; GCN-NEXT: ; implicit-def: $vgpr21 -; GCN-NEXT: ; implicit-def: $vgpr19 -; GCN-NEXT: ; implicit-def: $vgpr23 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr4 -; GCN-NEXT: ; kill: killed $vgpr4 -; GCN-NEXT: ; implicit-def: $vgpr34 -; GCN-NEXT: ; implicit-def: $vgpr27 -; GCN-NEXT: ; implicit-def: $vgpr29 -; GCN-NEXT: ; implicit-def: $vgpr25 -; GCN-NEXT: .LBB49_2: ; %Flow -; GCN-NEXT: s_or_saveexec_b64 s[4:5], s[4:5] -; GCN-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:716 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:720 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:724 ; 4-byte Folded Reload -; GCN-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB49_4 -; GCN-NEXT: ; %bb.3: ; %cmp.true -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v2, vcc, 3, v2 -; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GCN-NEXT: v_or_b32_e32 v2, v29, v2 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v4, vcc, 3, v4 -; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GCN-NEXT: v_or_b32_e32 v4, v25, v4 -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v6, vcc, 3, v5 -; GCN-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GCN-NEXT: v_or_b32_e32 v6, v34, v6 -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v7, vcc, 3, v5 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GCN-NEXT: v_or_b32_e32 v7, v27, v7 -; GCN-NEXT: s_mov_b32 s6, 0x30000 -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v8, vcc, 3, v5 -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v9, vcc, 3, v5 -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v10, vcc, 3, v5 -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v11, vcc, 3, v5 -; GCN-NEXT: v_add_i32_e32 v12, vcc, 3, v19 -; GCN-NEXT: v_add_i32_e32 v14, vcc, 3, v23 -; GCN-NEXT: v_add_i32_e32 v16, vcc, 3, v17 -; GCN-NEXT: v_add_i32_e32 v17, vcc, 3, v21 -; GCN-NEXT: v_add_i32_e32 v15, vcc, 3, v15 -; GCN-NEXT: v_add_i32_e32 v18, vcc, 3, v35 -; GCN-NEXT: v_add_i32_e32 v5, vcc, 3, v3 -; GCN-NEXT: v_add_i32_e32 v13, vcc, 3, v13 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v19, vcc, 3, v3 -; GCN-NEXT: v_add_i32_e32 v20, vcc, 3, v36 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v21, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v22, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v23, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v24, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v25, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v26, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v27, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v28, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v29, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v30, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v31, vcc, 3, v3 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v32, vcc, 3, v3 -; GCN-NEXT: v_add_i32_e32 v1, vcc, 3, v1 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v3, vcc, 3, v3 -; GCN-NEXT: v_and_b32_e32 v8, 0xffff, v8 -; GCN-NEXT: v_and_b32_e32 v9, 0xffff, v9 -; GCN-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GCN-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GCN-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GCN-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GCN-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GCN-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GCN-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GCN-NEXT: v_and_b32_e32 v18, 0xffff, v18 -; GCN-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GCN-NEXT: v_and_b32_e32 v13, 0xffff, v13 -; GCN-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GCN-NEXT: v_and_b32_e32 v20, 0xffff, v20 -; GCN-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GCN-NEXT: v_and_b32_e32 v22, 0xffff, v22 -; GCN-NEXT: v_and_b32_e32 v23, 0xffff, v23 -; GCN-NEXT: v_and_b32_e32 v24, 0xffff, v24 -; GCN-NEXT: v_and_b32_e32 v25, 0xffff, v25 -; GCN-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GCN-NEXT: v_and_b32_e32 v27, 0xffff, v27 -; GCN-NEXT: v_and_b32_e32 v28, 0xffff, v28 -; GCN-NEXT: v_and_b32_e32 v29, 0xffff, v29 -; GCN-NEXT: v_and_b32_e32 v30, 0xffff, v30 -; GCN-NEXT: v_and_b32_e32 v31, 0xffff, v31 -; GCN-NEXT: v_and_b32_e32 v32, 0xffff, v32 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v8, v33, v8 -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:732 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v9, v33, v9 -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v10, v33, v10 -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:740 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v11, v33, v11 -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v12, v33, v12 -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:736 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v34, v33, v14 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v16, v14, v16 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:728 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v17, v14, v17 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v15, v14, v15 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:744 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v35, v14, v18 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v5, v14, v5 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:748 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v13, v14, v13 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v19, v14, v19 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:752 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v20, v14, v20 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v21, v14, v21 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:756 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v36, v14, v22 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v23, v14, v23 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:760 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v24, v14, v24 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v25, v14, v25 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:764 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v37, v14, v26 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v27, v14, v27 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:768 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v28, v14, v28 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v29, v14, v29 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:772 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v38, v14, v30 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v31, v14, v31 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:776 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v39, v14, v32 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v1, v14, v1 -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:780 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_or_b32_e32 v3, v14, v3 -; GCN-NEXT: v_add_i32_e32 v32, vcc, 0x30000, v2 -; GCN-NEXT: v_add_i32_e32 v33, vcc, s6, v4 -; GCN-NEXT: v_add_i32_e32 v26, vcc, s6, v6 -; GCN-NEXT: v_add_i32_e32 v30, vcc, s6, v7 -; GCN-NEXT: v_add_i32_e32 v18, vcc, s6, v8 -; GCN-NEXT: v_add_i32_e32 v22, vcc, s6, v9 -; GCN-NEXT: v_add_i32_e32 v10, vcc, s6, v10 -; GCN-NEXT: v_add_i32_e32 v14, vcc, s6, v11 -; GCN-NEXT: v_add_i32_e32 v2, vcc, s6, v12 -; GCN-NEXT: v_add_i32_e32 v6, vcc, s6, v34 -; GCN-NEXT: v_add_i32_e32 v62, vcc, s6, v16 -; GCN-NEXT: v_add_i32_e32 v63, vcc, s6, v17 -; GCN-NEXT: v_add_i32_e32 v60, vcc, s6, v15 -; GCN-NEXT: v_add_i32_e32 v61, vcc, s6, v35 -; GCN-NEXT: v_add_i32_e32 v58, vcc, s6, v5 -; GCN-NEXT: v_add_i32_e32 v59, vcc, s6, v13 -; GCN-NEXT: v_add_i32_e32 v56, vcc, s6, v19 -; GCN-NEXT: v_add_i32_e32 v57, vcc, s6, v20 -; GCN-NEXT: v_add_i32_e32 v46, vcc, s6, v21 -; GCN-NEXT: v_add_i32_e32 v47, vcc, s6, v36 -; GCN-NEXT: v_add_i32_e32 v44, vcc, s6, v23 -; GCN-NEXT: v_add_i32_e32 v45, vcc, s6, v24 -; GCN-NEXT: v_add_i32_e32 v42, vcc, s6, v25 -; GCN-NEXT: v_add_i32_e32 v43, vcc, s6, v37 -; GCN-NEXT: v_add_i32_e32 v55, vcc, s6, v27 -; GCN-NEXT: v_add_i32_e32 v41, vcc, s6, v28 -; GCN-NEXT: v_add_i32_e32 v52, vcc, s6, v29 -; GCN-NEXT: v_add_i32_e32 v53, vcc, s6, v38 -; GCN-NEXT: v_add_i32_e32 v50, vcc, s6, v31 -; GCN-NEXT: v_add_i32_e32 v51, vcc, s6, v39 -; GCN-NEXT: v_add_i32_e32 v40, vcc, s6, v1 -; GCN-NEXT: v_add_i32_e32 v54, vcc, s6, v3 -; GCN-NEXT: v_alignbit_b32 v5, v54, v40, 24 -; GCN-NEXT: v_alignbit_b32 v9, v54, v40, 16 -; GCN-NEXT: v_alignbit_b32 v4, v54, v40, 8 -; GCN-NEXT: v_alignbit_b32 v1, v51, v50, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v51, v50, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: v_alignbit_b32 v8, v51, v50, 8 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v53, v52, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v53, v52, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: v_alignbit_b32 v11, v53, v52, 8 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v41, v55, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v41, v55, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: v_alignbit_b32 v12, v41, v55, 8 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v43, v42, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v43, v42, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: v_alignbit_b32 v20, v43, v42, 8 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v45, v44, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v45, v44, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: v_alignbit_b32 v24, v45, v44, 8 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v47, v46, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v47, v46, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: v_alignbit_b32 v28, v47, v46, 8 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v57, v56, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v57, v56, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v57, v56, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v59, v58, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v59, v58, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v59, v58, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v61, v60, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v61, v60, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v61, v60, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v63, v62, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v63, v62, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v63, v62, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v6, v2, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v6, v2, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v6, v2, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v14, v10, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v14, v10, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v14, v10, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v22, v18, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v22, v18, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v22, v18, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v30, v26, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v30, v26, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v30, v26, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v33, v32, 24 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v33, v32, 16 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_alignbit_b32 v1, v33, v32, 8 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e32 v16, 24, v54 -; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v54 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v54 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v51 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e32 v37, 16, v51 -; GCN-NEXT: v_lshrrev_b32_e32 v31, 8, v51 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v53 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e32 v38, 16, v53 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v53 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v41 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e32 v39, 16, v41 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v41 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v43 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e32 v48, 16, v43 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v43 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v45 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e32 v49, 16, v45 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v45 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v47 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v47 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v47 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v57 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v57 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v57 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v59 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v59 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v59 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v61 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v61 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v61 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v63 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v63 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v63 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v6 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v6 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v6 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v14 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v14 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v14 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v22 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v22 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v22 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v30 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v30 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v30 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 24, v33 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v33 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshrrev_b32_e32 v1, 8, v33 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: .LBB49_4: ; %end -; GCN-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v40 -; GCN-NEXT: s_waitcnt vmcnt(6) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v4 -; GCN-NEXT: v_or_b32_e32 v1, v1, v3 -; GCN-NEXT: v_and_b32_e32 v3, 0xff, v54 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v4, 8, v4 -; GCN-NEXT: v_or_b32_e32 v3, v3, v4 -; GCN-NEXT: v_and_b32_e32 v4, 0xff, v9 -; GCN-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GCN-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GCN-NEXT: v_or_b32_e32 v4, v5, v4 -; GCN-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GCN-NEXT: v_lshlrev_b32_e32 v7, 24, v16 -; GCN-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GCN-NEXT: v_or_b32_e32 v5, v7, v5 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GCN-NEXT: v_or_b32_e32 v1, v1, v4 -; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GCN-NEXT: v_or_b32_e32 v3, v3, v5 -; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_add_i32_e32 v1, vcc, 4, v0 -; GCN-NEXT: buffer_store_dword v3, v1, s[0:3], 0 offen -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v50 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v8 -; GCN-NEXT: v_or_b32_e32 v29, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v51 -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v31 -; GCN-NEXT: v_or_b32_e32 v31, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v52 -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v11 -; GCN-NEXT: v_or_b32_e32 v1, v1, v3 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v53 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v1, v1, v3 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v55 -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v12 -; GCN-NEXT: v_or_b32_e32 v1, v1, v3 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v41 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v1, v1, v3 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v42 -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v20 -; GCN-NEXT: v_or_b32_e32 v7, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v43 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v8, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v44 -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v24 -; GCN-NEXT: v_or_b32_e32 v9, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v45 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v11, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v46 -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v28 -; GCN-NEXT: v_or_b32_e32 v12, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v47 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v13, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v56 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v15, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v57 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v16, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v58 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v17, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v59 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v19, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v60 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v20, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v61 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v21, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v62 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v23, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v63 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GCN-NEXT: v_or_b32_e32 v24, v1, v3 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v1, v1, v2 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v6 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v1, v1, v2 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v10 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v10, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v14 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v14, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v18 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v18, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v22 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v22, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v26 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v25, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v30 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v26, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v32 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v27, v1, v2 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v33 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; GCN-NEXT: v_or_b32_e32 v28, v1, v2 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v30, v2, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v37 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v33, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v32, v2, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v38 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v34, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v35, v2, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v39 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v36, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v37, v2, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v48 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v38, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v39, v2, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v49 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v48, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v49, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v50, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v51, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v52, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v53, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v54, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v55, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v40, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v41, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v42, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v43, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v44, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v45, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v46, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v47, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v56, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v57, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v58, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v59, v2, v1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GCN-NEXT: v_or_b32_e32 v60, v2, v1 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v29 -; GCN-NEXT: v_or_b32_e32 v4, v1, v30 -; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v31 -; GCN-NEXT: v_or_b32_e32 v5, v1, v33 -; GCN-NEXT: v_add_i32_e32 v1, vcc, 8, v0 -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GCN-NEXT: v_or_b32_e32 v63, v2, v32 -; GCN-NEXT: v_add_i32_e32 v2, vcc, 12, v0 -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GCN-NEXT: v_or_b32_e32 v61, v3, v34 -; GCN-NEXT: v_add_i32_e32 v3, vcc, 16, v0 -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GCN-NEXT: v_or_b32_e32 v6, v6, v35 -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: v_add_i32_e32 v62, vcc, 20, v0 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GCN-NEXT: v_or_b32_e32 v6, v6, v36 -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_add_i32_e32 v6, vcc, 24, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GCN-NEXT: v_or_b32_e32 v7, v7, v37 -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: v_add_i32_e32 v29, vcc, 28, v0 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v8 -; GCN-NEXT: v_or_b32_e32 v7, v7, v38 -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: v_add_i32_e32 v30, vcc, 32, v0 -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v9 -; GCN-NEXT: v_or_b32_e32 v8, v7, v39 -; GCN-NEXT: v_add_i32_e32 v31, vcc, 36, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v11 -; GCN-NEXT: v_or_b32_e32 v9, v7, v48 -; GCN-NEXT: v_add_i32_e32 v32, vcc, 40, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v12 -; GCN-NEXT: v_or_b32_e32 v11, v7, v49 -; GCN-NEXT: v_add_i32_e32 v33, vcc, 44, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v13 -; GCN-NEXT: v_or_b32_e32 v12, v7, v50 -; GCN-NEXT: v_add_i32_e32 v34, vcc, 48, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v15 -; GCN-NEXT: v_or_b32_e32 v13, v7, v51 -; GCN-NEXT: v_add_i32_e32 v35, vcc, 52, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v16 -; GCN-NEXT: v_or_b32_e32 v15, v7, v52 -; GCN-NEXT: v_add_i32_e32 v36, vcc, 56, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v17 -; GCN-NEXT: v_or_b32_e32 v16, v7, v53 -; GCN-NEXT: v_add_i32_e32 v37, vcc, 60, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v19 -; GCN-NEXT: v_or_b32_e32 v17, v7, v54 -; GCN-NEXT: v_add_i32_e32 v38, vcc, 64, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v20 -; GCN-NEXT: v_or_b32_e32 v19, v7, v55 -; GCN-NEXT: v_add_i32_e32 v39, vcc, 0x44, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v21 -; GCN-NEXT: v_or_b32_e32 v20, v7, v40 -; GCN-NEXT: v_add_i32_e32 v48, vcc, 0x48, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v23 -; GCN-NEXT: v_or_b32_e32 v21, v7, v41 -; GCN-NEXT: v_add_i32_e32 v49, vcc, 0x4c, v0 -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v24 -; GCN-NEXT: v_or_b32_e32 v23, v7, v42 -; GCN-NEXT: v_add_i32_e32 v24, vcc, 0x50, v0 -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GCN-NEXT: v_or_b32_e32 v7, v7, v43 -; GCN-NEXT: v_add_i32_e32 v50, vcc, 0x54, v0 -; GCN-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v51, 0xffff, v51 -; GCN-NEXT: v_or_b32_e32 v44, v51, v44 -; GCN-NEXT: v_add_i32_e32 v51, vcc, 0x58, v0 -; GCN-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GCN-NEXT: v_or_b32_e32 v10, v10, v45 -; GCN-NEXT: v_add_i32_e32 v52, vcc, 0x5c, v0 -; GCN-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GCN-NEXT: v_or_b32_e32 v14, v14, v46 -; GCN-NEXT: v_add_i32_e32 v53, vcc, 0x60, v0 -; GCN-NEXT: v_and_b32_e32 v18, 0xffff, v18 -; GCN-NEXT: v_or_b32_e32 v18, v18, v47 -; GCN-NEXT: v_add_i32_e32 v54, vcc, 0x64, v0 -; GCN-NEXT: v_and_b32_e32 v22, 0xffff, v22 -; GCN-NEXT: v_or_b32_e32 v22, v22, v56 -; GCN-NEXT: v_add_i32_e32 v55, vcc, 0x68, v0 -; GCN-NEXT: v_and_b32_e32 v25, 0xffff, v25 -; GCN-NEXT: v_or_b32_e32 v25, v25, v57 -; GCN-NEXT: v_add_i32_e32 v40, vcc, 0x6c, v0 -; GCN-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GCN-NEXT: v_or_b32_e32 v26, v26, v58 -; GCN-NEXT: v_add_i32_e32 v41, vcc, 0x70, v0 -; GCN-NEXT: v_and_b32_e32 v27, 0xffff, v27 -; GCN-NEXT: v_or_b32_e32 v27, v27, v59 -; GCN-NEXT: v_add_i32_e32 v42, vcc, 0x74, v0 -; GCN-NEXT: v_and_b32_e32 v28, 0xffff, v28 -; GCN-NEXT: v_or_b32_e32 v28, v28, v60 -; GCN-NEXT: v_add_i32_e32 v43, vcc, 0x78, v0 -; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x7c, v0 -; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v63, v3, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v61, v62, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, v29, s[0:3], 0 offen -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, v30, s[0:3], 0 offen -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, v31, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v32, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v9, v33, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v11, v34, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v35, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v13, v36, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v15, v37, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, v38, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v17, v39, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v19, v48, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v20, v49, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v21, v24, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v23, v50, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v7, v51, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v44, v52, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v53, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v14, v54, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v18, v55, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v22, v40, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v25, v41, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v26, v42, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v27, v43, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: bitcast_v64i16_to_v128i8: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:8 -; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:4 -; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v15 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v13 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v11 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v9 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v7 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v5 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v3 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v1 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v29 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v28 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v27 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v26 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v25 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v24 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v23 -; VI-NEXT: v_lshrrev_b32_e32 v60, 16, v16 -; VI-NEXT: v_lshrrev_b32_e32 v33, 16, v14 -; VI-NEXT: v_lshrrev_b32_e32 v63, 16, v12 -; VI-NEXT: v_lshrrev_b32_e32 v59, 16, v10 -; VI-NEXT: v_lshrrev_b32_e32 v34, 16, v8 -; VI-NEXT: v_lshrrev_b32_e32 v47, 16, v6 -; VI-NEXT: v_lshrrev_b32_e32 v56, 16, v4 -; VI-NEXT: v_lshrrev_b32_e32 v57, 16, v2 -; VI-NEXT: v_lshrrev_b32_e32 v46, 16, v30 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v32, 16, v22 -; VI-NEXT: v_lshrrev_b32_e32 v49, 16, v21 -; VI-NEXT: v_lshrrev_b32_e32 v53, 16, v20 -; VI-NEXT: v_lshrrev_b32_e32 v51, 16, v19 -; VI-NEXT: v_lshrrev_b32_e32 v52, 16, v18 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; VI-NEXT: s_waitcnt vmcnt(14) -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v31 -; VI-NEXT: v_lshrrev_b32_e32 v31, 16, v37 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v50, 16, v36 -; VI-NEXT: v_lshrrev_b32_e32 v31, 16, v17 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr32 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr55 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; kill: killed $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr62 -; VI-NEXT: ; implicit-def: $vgpr31 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr38 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr42 -; VI-NEXT: ; implicit-def: $vgpr61 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr58 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr39 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; kill: killed $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr35 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: ; kill: killed $vgpr48 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: ; kill: killed $vgpr48 -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: ; kill: killed $vgpr48 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: ; kill: killed $vgpr48 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; kill: killed $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr43 -; VI-NEXT: ; implicit-def: $vgpr40 -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr45 -; VI-NEXT: ; implicit-def: $vgpr44 -; VI-NEXT: ; implicit-def: $vgpr41 -; VI-NEXT: ; implicit-def: $vgpr54 -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; VI-NEXT: ; implicit-def: $vgpr48 -; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc -; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; VI-NEXT: s_cbranch_execz .LBB49_2 -; VI-NEXT: ; %bb.1: ; %cmp.false -; VI-NEXT: v_lshrrev_b32_e32 v31, 24, v16 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v16 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v15 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 24, v14 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v14 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v13 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 24, v12 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v12 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v11 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 24, v10 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v10 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v9 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 24, v8 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v8 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v7 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 24, v6 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v6 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v5 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[31:32], 24, v[15:16] -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[31:32], 24, v[13:14] -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[31:32], 24, v[11:12] -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[31:32], 24, v[9:10] -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v31, v7 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v9, v10 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v9, v11 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v9, v12 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v9, v13 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v9, v14 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v9, v16 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v9, v8 -; VI-NEXT: v_lshrrev_b64 v[7:8], 24, v[7:8] -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v7, v5 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v7, v6 -; VI-NEXT: v_lshrrev_b64 v[5:6], 24, v[5:6] -; VI-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v5, 24, v4 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v4 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v3 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v5, v3 -; VI-NEXT: v_lshrrev_b64 v[40:41], 24, v[3:4] -; VI-NEXT: v_lshrrev_b32_e32 v3, 24, v2 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v3, v1 -; VI-NEXT: v_lshrrev_b64 v[43:44], 24, v[1:2] -; VI-NEXT: v_lshrrev_b32_e32 v1, 24, v37 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v37 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v36 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v36 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v37 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v3, v2 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[36:37] -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v5, v4 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 24, v30 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v30 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v29 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v29 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v30 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[29:30] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 24, v28 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v28 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v27 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v27 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v28 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[27:28] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v26 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v25 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v25 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v26 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[25:26] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v24 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v23 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v23 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v24 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v22 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v21 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v21 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v22 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v19 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v20 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v17 -; VI-NEXT: v_lshrrev_b64 v[44:45], 24, v[19:20] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v18 -; VI-NEXT: v_lshrrev_b64 v[54:55], 24, v[23:24] -; VI-NEXT: v_lshrrev_b64 v[41:42], 24, v[21:22] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v1, v46 -; VI-NEXT: v_lshrrev_b64 v[45:46], 24, v[17:18] -; VI-NEXT: v_mov_b32_e32 v32, v15 -; VI-NEXT: v_lshrrev_b32_e32 v35, 24, v26 -; VI-NEXT: v_lshrrev_b32_e32 v39, 24, v24 -; VI-NEXT: v_lshrrev_b32_e32 v58, 24, v22 -; VI-NEXT: v_lshrrev_b32_e32 v61, 24, v20 -; VI-NEXT: v_lshrrev_b32_e32 v42, 8, v20 -; VI-NEXT: v_lshrrev_b32_e32 v38, 8, v19 -; VI-NEXT: v_lshrrev_b32_e32 v31, 24, v18 -; VI-NEXT: v_lshrrev_b32_e32 v62, 8, v18 -; VI-NEXT: v_lshrrev_b32_e32 v55, 8, v17 -; VI-NEXT: v_mov_b32_e32 v46, v1 -; VI-NEXT: ; implicit-def: $vgpr1 -; VI-NEXT: ; implicit-def: $vgpr3 -; VI-NEXT: ; implicit-def: $vgpr5 -; VI-NEXT: ; implicit-def: $vgpr7 -; VI-NEXT: ; implicit-def: $vgpr9 -; VI-NEXT: ; implicit-def: $vgpr11 -; VI-NEXT: ; implicit-def: $vgpr13 -; VI-NEXT: ; implicit-def: $vgpr15 -; VI-NEXT: ; implicit-def: $vgpr17 -; VI-NEXT: ; implicit-def: $vgpr19 -; VI-NEXT: ; implicit-def: $vgpr21 -; VI-NEXT: ; implicit-def: $vgpr23 -; VI-NEXT: ; implicit-def: $vgpr25 -; VI-NEXT: ; implicit-def: $vgpr27 -; VI-NEXT: ; implicit-def: $vgpr29 -; VI-NEXT: ; implicit-def: $vgpr37 -; VI-NEXT: .LBB49_2: ; %Flow -; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; VI-NEXT: s_cbranch_execz .LBB49_4 -; VI-NEXT: ; %bb.3: ; %cmp.true -; VI-NEXT: v_mov_b32_e32 v31, 3 -; VI-NEXT: v_add_u16_sdwa v51, v18, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_add_u16_e32 v32, 3, v18 -; VI-NEXT: v_lshlrev_b32_e32 v18, 16, v51 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v18, v32, v18 -; VI-NEXT: v_add_u16_e32 v32, 3, v17 -; VI-NEXT: v_add_u16_sdwa v17, v17, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v17, v32, v17 -; VI-NEXT: v_add_u16_e32 v32, 3, v20 -; VI-NEXT: v_add_u16_sdwa v20, v20, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v20, v32, v20 -; VI-NEXT: v_add_u16_e32 v32, 3, v19 -; VI-NEXT: v_add_u16_sdwa v19, v19, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v19, 16, v19 -; VI-NEXT: v_add_u16_sdwa v48, v22, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v19, v32, v19 -; VI-NEXT: v_add_u16_e32 v32, 3, v22 -; VI-NEXT: v_lshlrev_b32_e32 v22, 16, v48 -; VI-NEXT: v_add_u16_sdwa v53, v21, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v22, v32, v22 -; VI-NEXT: v_add_u16_e32 v32, 3, v21 -; VI-NEXT: v_lshlrev_b32_e32 v21, 16, v53 -; VI-NEXT: v_add_u16_sdwa v61, v24, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v21, v32, v21 -; VI-NEXT: v_add_u16_e32 v32, 3, v24 -; VI-NEXT: v_lshlrev_b32_e32 v24, 16, v61 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v24, v32, v24 -; VI-NEXT: v_add_u16_e32 v32, 3, v23 -; VI-NEXT: v_add_u16_sdwa v23, v23, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v23, 16, v23 -; VI-NEXT: v_add_u16_sdwa v58, v26, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v23, v32, v23 -; VI-NEXT: v_add_u16_e32 v32, 3, v26 -; VI-NEXT: v_lshlrev_b32_e32 v26, 16, v58 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v26, v32, v26 -; VI-NEXT: v_add_u16_e32 v32, 3, v25 -; VI-NEXT: v_add_u16_sdwa v25, v25, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; VI-NEXT: v_add_u16_sdwa v39, v28, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v25, v32, v25 -; VI-NEXT: v_add_u16_e32 v32, 3, v28 -; VI-NEXT: v_lshlrev_b32_e32 v28, 16, v39 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v28, v32, v28 -; VI-NEXT: v_add_u16_e32 v32, 3, v27 -; VI-NEXT: v_add_u16_sdwa v27, v27, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; VI-NEXT: v_add_u16_sdwa v35, v30, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v27, v32, v27 -; VI-NEXT: v_add_u16_e32 v33, 3, v30 -; VI-NEXT: v_add_u16_e32 v34, 3, v29 -; VI-NEXT: v_add_u16_sdwa v32, v29, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v29, 16, v35 -; VI-NEXT: v_add_u16_sdwa v52, v37, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v30, v33, v29 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v29, 16, v32 -; VI-NEXT: v_add_u16_e32 v33, 3, v37 -; VI-NEXT: v_add_u16_sdwa v50, v36, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v32, 16, v52 -; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v29, v34, v29 -; VI-NEXT: v_add_u16_e32 v34, 3, v36 -; VI-NEXT: v_or_b32_e32 v37, v33, v32 -; VI-NEXT: v_lshlrev_b32_e32 v32, 16, v50 -; VI-NEXT: v_add_u16_sdwa v57, v2, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v36, v34, v32 -; VI-NEXT: v_add_u16_e32 v33, 3, v2 -; VI-NEXT: v_add_u16_e32 v34, 3, v1 -; VI-NEXT: v_add_u16_sdwa v32, v1, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v57 -; VI-NEXT: v_or_b32_e32 v2, v33, v1 -; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v32 -; VI-NEXT: v_add_u16_sdwa v56, v4, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v1, v34, v1 -; VI-NEXT: v_add_u16_e32 v33, 3, v4 -; VI-NEXT: v_add_u16_e32 v34, 3, v3 -; VI-NEXT: v_add_u16_sdwa v32, v3, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v3, 16, v56 -; VI-NEXT: v_or_b32_e32 v4, v33, v3 -; VI-NEXT: v_lshlrev_b32_e32 v3, 16, v32 -; VI-NEXT: v_add_u16_sdwa v47, v6, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v3, v34, v3 -; VI-NEXT: v_add_u16_e32 v33, 3, v6 -; VI-NEXT: v_add_u16_e32 v34, 3, v5 -; VI-NEXT: v_add_u16_sdwa v32, v5, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v5, 16, v47 -; VI-NEXT: v_or_b32_e32 v6, v33, v5 -; VI-NEXT: v_lshlrev_b32_e32 v5, 16, v32 -; VI-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v5, v34, v5 -; VI-NEXT: v_add_u16_sdwa v34, v8, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; VI-NEXT: v_add_u16_e32 v38, 3, v8 -; VI-NEXT: v_add_u16_e32 v33, 3, v7 -; VI-NEXT: v_add_u16_sdwa v32, v7, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v7, 16, v34 -; VI-NEXT: v_or_b32_e32 v8, v38, v7 -; VI-NEXT: v_lshlrev_b32_e32 v7, 16, v32 -; VI-NEXT: v_add_u16_sdwa v59, v10, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v7, v33, v7 -; VI-NEXT: v_add_u16_e32 v33, 3, v10 -; VI-NEXT: v_add_u16_e32 v38, 3, v9 -; VI-NEXT: v_add_u16_sdwa v32, v9, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v9, 16, v59 -; VI-NEXT: v_or_b32_e32 v10, v33, v9 -; VI-NEXT: v_lshlrev_b32_e32 v9, 16, v32 -; VI-NEXT: v_add_u16_sdwa v63, v12, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v9, v38, v9 -; VI-NEXT: v_add_u16_e32 v33, 3, v12 -; VI-NEXT: v_add_u16_e32 v38, 3, v11 -; VI-NEXT: v_add_u16_sdwa v32, v11, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v11, 16, v63 -; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v12, v33, v11 -; VI-NEXT: v_lshlrev_b32_e32 v11, 16, v32 -; VI-NEXT: v_add_u16_sdwa v33, v14, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v11, v38, v11 -; VI-NEXT: v_add_u16_e32 v38, 3, v14 -; VI-NEXT: v_add_u16_e32 v49, 3, v13 -; VI-NEXT: v_add_u16_sdwa v32, v13, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v13, 16, v33 -; VI-NEXT: v_add_u16_sdwa v60, v16, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_e32 v14, v38, v13 -; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; VI-NEXT: v_lshlrev_b32_e32 v13, 16, v32 -; VI-NEXT: v_add_u16_sdwa v31, v15, v31 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_add_u16_e32 v16, 3, v16 -; VI-NEXT: v_add_u16_e32 v32, 3, v15 -; VI-NEXT: v_lshlrev_b32_e32 v15, 16, v60 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v16, v16, v15 -; VI-NEXT: v_lshlrev_b32_e32 v15, 16, v31 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v15, v32, v15 -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v16 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v31, 8, v15 -; VI-NEXT: v_lshrrev_b64 v[15:16], 24, v[15:16] -; VI-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; VI-NEXT: v_or_b32_e32 v13, v49, v13 -; VI-NEXT: v_lshrrev_b32_e32 v15, 8, v14 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v15, 8, v13 -; VI-NEXT: v_lshrrev_b64 v[13:14], 24, v[13:14] -; VI-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v13, 8, v12 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v13, 8, v11 -; VI-NEXT: v_lshrrev_b64 v[11:12], 24, v[11:12] -; VI-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v11, 8, v10 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v11, 8, v9 -; VI-NEXT: v_lshrrev_b64 v[9:10], 24, v[9:10] -; VI-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v9, 8, v8 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v9, 8, v7 -; VI-NEXT: v_lshrrev_b64 v[7:8], 24, v[7:8] -; VI-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v7, 8, v6 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v7, 8, v5 -; VI-NEXT: v_lshrrev_b64 v[5:6], 24, v[5:6] -; VI-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v4 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v3 -; VI-NEXT: v_lshrrev_b64 v[40:41], 24, v[3:4] -; VI-NEXT: v_lshrrev_b32_e32 v3, 8, v2 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v3, 8, v1 -; VI-NEXT: v_lshrrev_b64 v[43:44], 24, v[1:2] -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v37 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v36 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[36:37] -; VI-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v30 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v29 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[29:30] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v28 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v27 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[27:28] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v26 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v25 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b64 v[1:2], 24, v[25:26] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v24 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v23 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v22 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill -; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v21 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v60, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v33, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v63, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v59, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v34, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v47, 8, 8 -; VI-NEXT: v_lshrrev_b64 v[44:45], 24, v[19:20] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v56, 8, 8 -; VI-NEXT: v_lshrrev_b64 v[45:46], 24, v[17:18] -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v57, 8, 8 -; VI-NEXT: v_mov_b32_e32 v46, v35 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v52, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v46, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v1, v39, 8, 8 -; VI-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill -; VI-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; VI-NEXT: v_mov_b32_e32 v49, v53 -; VI-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload -; VI-NEXT: v_mov_b32_e32 v52, v51 -; VI-NEXT: v_bfe_u32 v31, v51, 8, 8 -; VI-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload -; VI-NEXT: v_lshrrev_b64 v[54:55], 24, v[23:24] -; VI-NEXT: v_lshrrev_b64 v[41:42], 24, v[21:22] -; VI-NEXT: v_lshrrev_b32_e32 v42, 8, v20 -; VI-NEXT: v_lshrrev_b32_e32 v38, 8, v19 -; VI-NEXT: v_lshrrev_b32_e32 v62, 8, v18 -; VI-NEXT: v_lshrrev_b32_e32 v55, 8, v17 -; VI-NEXT: v_bfe_u32 v35, v58, 8, 8 -; VI-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; VI-NEXT: v_bfe_u32 v39, v61, 8, 8 -; VI-NEXT: v_bfe_u32 v58, v48, 8, 8 -; VI-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; VI-NEXT: s_waitcnt vmcnt(3) -; VI-NEXT: v_bfe_u32 v61, v53, 8, 8 -; VI-NEXT: .LBB49_4: ; %end -; VI-NEXT: s_or_b64 exec, exec, s[4:5] -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v43 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v57, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 4, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v40 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v56, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 12, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 16, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v47, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 20, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 24, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v34, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 28, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v59, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 36, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v63, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 44, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 48, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v33, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 52, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(3) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v1, v32, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 56, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v60, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 60, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v55 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v45 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 64, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v62 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v31 -; VI-NEXT: v_or_b32_sdwa v2, v52, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x44, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v38 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v44 -; VI-NEXT: v_or_b32_sdwa v2, v51, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x48, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v42 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v61 -; VI-NEXT: v_or_b32_sdwa v2, v53, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x4c, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v41 -; VI-NEXT: v_or_b32_sdwa v2, v49, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x50, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v58 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x54, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v54 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x58, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v39 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x5c, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x60, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v35 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x64, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x68, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x6c, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x70, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v46, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x74, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v50, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: v_add_u32_e32 v2, vcc, 0x78, v0 -; VI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7c, v0 -; VI-NEXT: s_waitcnt vmcnt(2) -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; VI-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; VI-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; VI-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: bitcast_v64i16_to_v128i8: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 -; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr44 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr53 -; GFX9-NEXT: ; implicit-def: $vgpr58 -; GFX9-NEXT: ; implicit-def: $vgpr36 -; GFX9-NEXT: ; implicit-def: $vgpr57 -; GFX9-NEXT: ; implicit-def: $vgpr35 -; GFX9-NEXT: ; implicit-def: $vgpr34 -; GFX9-NEXT: ; implicit-def: $vgpr63 -; GFX9-NEXT: ; implicit-def: $vgpr61 -; GFX9-NEXT: ; implicit-def: $vgpr39 -; GFX9-NEXT: ; implicit-def: $vgpr38 -; GFX9-NEXT: ; implicit-def: $vgpr60 -; GFX9-NEXT: ; implicit-def: $vgpr59 -; GFX9-NEXT: ; implicit-def: $vgpr37 -; GFX9-NEXT: ; implicit-def: $vgpr47 -; GFX9-NEXT: ; implicit-def: $vgpr49 -; GFX9-NEXT: ; implicit-def: $vgpr46 -; GFX9-NEXT: ; implicit-def: $vgpr45 -; GFX9-NEXT: ; implicit-def: $vgpr48 -; GFX9-NEXT: ; implicit-def: $vgpr62 -; GFX9-NEXT: ; kill: killed $vgpr50 -; GFX9-NEXT: ; implicit-def: $vgpr56 -; GFX9-NEXT: ; implicit-def: $vgpr42 -; GFX9-NEXT: ; implicit-def: $vgpr41 -; GFX9-NEXT: ; implicit-def: $vgpr40 -; GFX9-NEXT: ; implicit-def: $vgpr52 -; GFX9-NEXT: ; implicit-def: $vgpr51 -; GFX9-NEXT: ; implicit-def: $vgpr50 -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-NEXT: ; implicit-def: $vgpr53 -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-NEXT: ; implicit-def: $vgpr54 -; GFX9-NEXT: ; implicit-def: $vgpr53 -; GFX9-NEXT: ; implicit-def: $vgpr43 -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GFX9-NEXT: ; implicit-def: $vgpr43 -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-NEXT: ; implicit-def: $vgpr43 -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-NEXT: ; implicit-def: $vgpr43 -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-NEXT: ; implicit-def: $vgpr43 -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-NEXT: ; implicit-def: $vgpr43 -; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-NEXT: s_waitcnt vmcnt(18) -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: ; kill: killed $vgpr33 -; GFX9-NEXT: ; implicit-def: $vgpr33 -; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB49_2 -; GFX9-NEXT: ; %bb.1: ; %cmp.false -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v16 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v16 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v16 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v15 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v15 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v14 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v14 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v14 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v13 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v13 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v12 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v12 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v12 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v11 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v11 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v10 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v10 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v9 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v8 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v8 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v7 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v6 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v6 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v5 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v4 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v4 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v3 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v2 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GFX9-NEXT: s_waitcnt vmcnt(45) -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v32 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v32 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-NEXT: s_waitcnt vmcnt(46) -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v31 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v31 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v30 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v30 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v30 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v29 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v29 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v28 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v28 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v28 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v27 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v27 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v26 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v26 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v26 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v25 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v25 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v24 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v24 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v24 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v23 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v23 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v22 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v22 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v22 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v21 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v21 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v20 -; GFX9-NEXT: v_lshrrev_b64 v[50:51], 24, v[15:16] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v20 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[50:51], 24, v[13:14] -; GFX9-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[50:51], 24, v[11:12] -; GFX9-NEXT: v_lshrrev_b64 v[51:52], 24, v[9:10] -; GFX9-NEXT: v_lshrrev_b64 v[52:53], 24, v[7:8] -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[31:32] -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[29:30] -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[27:28] -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[25:26] -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[23:24] -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[21:22] -; GFX9-NEXT: v_lshrrev_b64 v[40:41], 24, v[5:6] -; GFX9-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[41:42], 24, v[3:4] -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[19:20] -; GFX9-NEXT: v_lshrrev_b32_e32 v59, 8, v10 -; GFX9-NEXT: v_lshrrev_b32_e32 v60, 8, v9 -; GFX9-NEXT: v_lshrrev_b32_e32 v38, 8, v8 -; GFX9-NEXT: v_lshrrev_b32_e32 v39, 8, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v61, 8, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v63, 8, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v34, 8, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v57, 16, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v36, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v58, 16, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v44, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v56, 24, v32 -; GFX9-NEXT: v_lshrrev_b32_e32 v62, 8, v20 -; GFX9-NEXT: v_lshrrev_b32_e32 v48, 16, v19 -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v19 -; GFX9-NEXT: v_lshrrev_b32_e32 v45, 24, v18 -; GFX9-NEXT: v_lshrrev_b32_e32 v46, 16, v18 -; GFX9-NEXT: v_lshrrev_b32_e32 v49, 8, v18 -; GFX9-NEXT: v_lshrrev_b32_e32 v47, 16, v17 -; GFX9-NEXT: v_lshrrev_b32_e32 v37, 8, v17 -; GFX9-NEXT: v_lshrrev_b64 v[42:43], 24, v[1:2] -; GFX9-NEXT: v_lshrrev_b64 v[54:55], 24, v[17:18] -; GFX9-NEXT: .LBB49_2: ; %Flow -; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB49_4 -; GFX9-NEXT: ; %bb.3: ; %cmp.true -; GFX9-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[15:16] -; GFX9-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v13, v13, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[13:14] -; GFX9-NEXT: s_waitcnt vmcnt(19) -; GFX9-NEXT: v_pk_add_u16 v32, v32, 3 op_sel_hi:[1,0] -; GFX9-NEXT: s_waitcnt vmcnt(18) -; GFX9-NEXT: v_pk_add_u16 v31, v31, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[31:32] -; GFX9-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[29:30] -; GFX9-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[27:28] -; GFX9-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[25:26] -; GFX9-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[23:24] -; GFX9-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b64 v[33:34], 24, v[21:22] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v16 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v16 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v16 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v15 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v15 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v14 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v14 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v14 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v13 -; GFX9-NEXT: v_pk_add_u16 v12, v12, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v13 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v12 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v12 -; GFX9-NEXT: v_pk_add_u16 v11, v11, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v12 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v11 -; GFX9-NEXT: v_pk_add_u16 v10, v10, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v11 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v10 -; GFX9-NEXT: v_pk_add_u16 v9, v9, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v10 -; GFX9-NEXT: v_pk_add_u16 v8, v8, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v9 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v8 -; GFX9-NEXT: v_pk_add_u16 v7, v7, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v8 -; GFX9-NEXT: v_pk_add_u16 v6, v6, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v7 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v6 -; GFX9-NEXT: v_pk_add_u16 v5, v5, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v6 -; GFX9-NEXT: v_pk_add_u16 v4, v4, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v5 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v4 -; GFX9-NEXT: v_pk_add_u16 v3, v3, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v4 -; GFX9-NEXT: v_pk_add_u16 v2, v2, 3 op_sel_hi:[1,0] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v3 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v2 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v32 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v32 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v31 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v31 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v30 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v30 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v30 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v29 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v29 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v28 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v28 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v28 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v27 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v27 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v26 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v26 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v26 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v25 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v25 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v24 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v24 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v24 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v23 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v23 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v22 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v22 -; GFX9-NEXT: v_lshrrev_b64 v[50:51], 24, v[11:12] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v22 -; GFX9-NEXT: v_lshrrev_b64 v[51:52], 24, v[9:10] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v21 -; GFX9-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_lshrrev_b64 v[52:53], 24, v[7:8] -; GFX9-NEXT: v_lshrrev_b64 v[40:41], 24, v[5:6] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v21 -; GFX9-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_pk_add_u16 v1, v1, 3 op_sel_hi:[1,0] -; GFX9-NEXT: v_lshrrev_b64 v[41:42], 24, v[3:4] -; GFX9-NEXT: v_lshrrev_b64 v[53:54], 24, v[19:20] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 24, v20 -; GFX9-NEXT: v_lshrrev_b64 v[42:43], 24, v[1:2] -; GFX9-NEXT: v_lshrrev_b64 v[54:55], 24, v[17:18] -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 16, v20 -; GFX9-NEXT: v_lshrrev_b32_e32 v59, 8, v10 -; GFX9-NEXT: v_lshrrev_b32_e32 v60, 8, v9 -; GFX9-NEXT: v_lshrrev_b32_e32 v38, 8, v8 -; GFX9-NEXT: v_lshrrev_b32_e32 v39, 8, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v61, 8, v6 -; GFX9-NEXT: v_lshrrev_b32_e32 v63, 8, v5 -; GFX9-NEXT: v_lshrrev_b32_e32 v34, 8, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v35, 8, v3 -; GFX9-NEXT: v_lshrrev_b32_e32 v57, 16, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v36, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v58, 16, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v44, 8, v1 -; GFX9-NEXT: v_lshrrev_b32_e32 v56, 24, v32 -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GFX9-NEXT: v_lshrrev_b32_e32 v62, 8, v20 -; GFX9-NEXT: v_lshrrev_b32_e32 v48, 16, v19 -; GFX9-NEXT: v_lshrrev_b32_e32 v33, 8, v19 -; GFX9-NEXT: v_lshrrev_b32_e32 v45, 24, v18 -; GFX9-NEXT: v_lshrrev_b32_e32 v46, 16, v18 -; GFX9-NEXT: v_lshrrev_b32_e32 v49, 8, v18 -; GFX9-NEXT: v_lshrrev_b32_e32 v47, 16, v17 -; GFX9-NEXT: v_lshrrev_b32_e32 v37, 8, v17 -; GFX9-NEXT: .LBB49_4: ; %end -; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v34 -; GFX9-NEXT: v_or_b32_sdwa v4, v4, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v63 -; GFX9-NEXT: v_or_b32_sdwa v5, v5, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v61 -; GFX9-NEXT: v_or_b32_sdwa v6, v6, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v39 -; GFX9-NEXT: v_or_b32_sdwa v7, v7, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v38 -; GFX9-NEXT: v_or_b32_sdwa v8, v8, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v60 -; GFX9-NEXT: v_or_b32_sdwa v9, v9, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v59 -; GFX9-NEXT: v_or_b32_sdwa v10, v10, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v55, 8, v44 -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v55 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v36, 8, v36 -; GFX9-NEXT: v_or_b32_sdwa v2, v2, v36 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v35, 8, v35 -; GFX9-NEXT: v_or_b32_sdwa v3, v3, v35 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v34 -; GFX9-NEXT: v_or_b32_sdwa v11, v11, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v34 -; GFX9-NEXT: v_or_b32_sdwa v12, v12, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v34 -; GFX9-NEXT: v_or_b32_sdwa v13, v13, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v34 -; GFX9-NEXT: v_or_b32_sdwa v14, v14, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v34 -; GFX9-NEXT: v_or_b32_sdwa v15, v15, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v34 -; GFX9-NEXT: v_or_b32_sdwa v16, v16, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v37 -; GFX9-NEXT: v_or_b32_sdwa v17, v17, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v49 -; GFX9-NEXT: v_or_b32_sdwa v18, v18, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_lshlrev_b16_e32 v34, 8, v42 -; GFX9-NEXT: v_or_b32_sdwa v34, v58, v34 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v34 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v1, v57, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v41 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:8 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v40 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:20 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v52 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:24 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:28 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v51 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:32 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:36 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v50 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:40 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v12, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:44 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v13, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:48 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v14, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:52 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v15, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:56 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v16, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:60 -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v54 -; GFX9-NEXT: v_or_b32_sdwa v1, v47, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v17, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:64 -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v45 -; GFX9-NEXT: v_or_b32_sdwa v1, v46, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v18, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:68 -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v53 -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v33 -; GFX9-NEXT: v_or_b32_sdwa v1, v48, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v2, v19, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:72 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v62 -; GFX9-NEXT: v_or_b32_sdwa v1, v20, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:76 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v1, v21, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:80 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v1, v22, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:84 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v1, v23, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:88 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v1, v24, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:92 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v1, v25, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:96 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v1, v26, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:100 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v1, v27, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:104 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v1, v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:108 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v1, v29, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:112 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v1, v30, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:116 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX9-NEXT: v_or_b32_sdwa v1, v31, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:120 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GFX9-NEXT: v_lshlrev_b16_e32 v2, 8, v56 -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX9-NEXT: v_or_b32_sdwa v1, v32, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:124 -; GFX9-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: bitcast_v64i16_to_v128i8: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_clause 0x13 -; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:88 -; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:84 -; GFX11-NEXT: scratch_store_b32 off, v42, s32 offset:80 -; GFX11-NEXT: scratch_store_b32 off, v43, s32 offset:76 -; GFX11-NEXT: scratch_store_b32 off, v44, s32 offset:72 -; GFX11-NEXT: scratch_store_b32 off, v45, s32 offset:68 -; GFX11-NEXT: scratch_store_b32 off, v46, s32 offset:64 -; GFX11-NEXT: scratch_store_b32 off, v47, s32 offset:60 -; GFX11-NEXT: scratch_store_b32 off, v56, s32 offset:56 -; GFX11-NEXT: scratch_store_b32 off, v57, s32 offset:52 -; GFX11-NEXT: scratch_store_b32 off, v58, s32 offset:48 -; GFX11-NEXT: scratch_store_b32 off, v59, s32 offset:44 -; GFX11-NEXT: scratch_store_b32 off, v60, s32 offset:40 -; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:36 -; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:32 -; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:28 -; GFX11-NEXT: scratch_store_b32 off, v72, s32 offset:24 -; GFX11-NEXT: scratch_store_b32 off, v73, s32 offset:20 -; GFX11-NEXT: scratch_store_b32 off, v74, s32 offset:16 -; GFX11-NEXT: scratch_store_b32 off, v75, s32 offset:12 -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-NEXT: ; implicit-def: $vgpr74 -; GFX11-NEXT: ; implicit-def: $vgpr72 -; GFX11-NEXT: ; implicit-def: $vgpr64 -; GFX11-NEXT: ; implicit-def: $vgpr63 -; GFX11-NEXT: ; implicit-def: $vgpr62 -; GFX11-NEXT: ; implicit-def: $vgpr60 -; GFX11-NEXT: ; implicit-def: $vgpr57 -; GFX11-NEXT: ; implicit-def: $vgpr47 -; GFX11-NEXT: ; implicit-def: $vgpr54 -; GFX11-NEXT: ; implicit-def: $vgpr45 -; GFX11-NEXT: ; implicit-def: $vgpr43 -; GFX11-NEXT: ; implicit-def: $vgpr42 -; GFX11-NEXT: ; implicit-def: $vgpr183 -; GFX11-NEXT: ; implicit-def: $vgpr181 -; GFX11-NEXT: ; implicit-def: $vgpr53 -; GFX11-NEXT: ; implicit-def: $vgpr179 -; GFX11-NEXT: ; implicit-def: $vgpr177 -; GFX11-NEXT: ; implicit-def: $vgpr167 -; GFX11-NEXT: ; implicit-def: $vgpr165 -; GFX11-NEXT: ; implicit-def: $vgpr164 -; GFX11-NEXT: ; implicit-def: $vgpr52 -; GFX11-NEXT: ; implicit-def: $vgpr161 -; GFX11-NEXT: ; implicit-def: $vgpr151 -; GFX11-NEXT: ; implicit-def: $vgpr150 -; GFX11-NEXT: ; implicit-def: $vgpr147 -; GFX11-NEXT: ; implicit-def: $vgpr145 -; GFX11-NEXT: ; implicit-def: $vgpr51 -; GFX11-NEXT: ; implicit-def: $vgpr144 -; GFX11-NEXT: ; implicit-def: $vgpr133 -; GFX11-NEXT: ; implicit-def: $vgpr131 -; GFX11-NEXT: ; implicit-def: $vgpr129 -; GFX11-NEXT: ; implicit-def: $vgpr119 -; GFX11-NEXT: ; implicit-def: $vgpr50 -; GFX11-NEXT: ; implicit-def: $vgpr117 -; GFX11-NEXT: ; implicit-def: $vgpr116 -; GFX11-NEXT: ; implicit-def: $vgpr115 -; GFX11-NEXT: ; implicit-def: $vgpr102 -; GFX11-NEXT: ; implicit-def: $vgpr100 -; GFX11-NEXT: ; implicit-def: $vgpr49 -; GFX11-NEXT: ; implicit-def: $vgpr98 -; GFX11-NEXT: ; implicit-def: $vgpr97 -; GFX11-NEXT: ; implicit-def: $vgpr96 -; GFX11-NEXT: ; implicit-def: $vgpr86 -; GFX11-NEXT: ; implicit-def: $vgpr82 -; GFX11-NEXT: ; implicit-def: $vgpr48 -; GFX11-NEXT: ; implicit-def: $vgpr80 -; GFX11-NEXT: ; implicit-def: $vgpr70 -; GFX11-NEXT: ; implicit-def: $vgpr68 -; GFX11-NEXT: ; implicit-def: $vgpr75 -; GFX11-NEXT: ; implicit-def: $vgpr73 -; GFX11-NEXT: ; implicit-def: $vgpr66 -; GFX11-NEXT: ; implicit-def: $vgpr61 -; GFX11-NEXT: ; implicit-def: $vgpr59 -; GFX11-NEXT: ; implicit-def: $vgpr58 -; GFX11-NEXT: ; implicit-def: $vgpr56 -; GFX11-NEXT: ; implicit-def: $vgpr46 -; GFX11-NEXT: ; implicit-def: $vgpr65 -; GFX11-NEXT: ; implicit-def: $vgpr44 -; GFX11-NEXT: ; implicit-def: $vgpr41 -; GFX11-NEXT: ; implicit-def: $vgpr40 -; GFX11-NEXT: ; implicit-def: $vgpr182 -; GFX11-NEXT: ; implicit-def: $vgpr180 -; GFX11-NEXT: ; implicit-def: $vgpr38 -; GFX11-NEXT: ; implicit-def: $vgpr178 -; GFX11-NEXT: ; implicit-def: $vgpr176 -; GFX11-NEXT: ; implicit-def: $vgpr166 -; GFX11-NEXT: ; implicit-def: $vgpr163 -; GFX11-NEXT: ; implicit-def: $vgpr162 -; GFX11-NEXT: ; implicit-def: $vgpr37 -; GFX11-NEXT: ; implicit-def: $vgpr160 -; GFX11-NEXT: ; implicit-def: $vgpr149 -; GFX11-NEXT: ; implicit-def: $vgpr148 -; GFX11-NEXT: ; implicit-def: $vgpr146 -; GFX11-NEXT: ; implicit-def: $vgpr135 -; GFX11-NEXT: ; implicit-def: $vgpr36 -; GFX11-NEXT: ; implicit-def: $vgpr134 -; GFX11-NEXT: ; implicit-def: $vgpr132 -; GFX11-NEXT: ; implicit-def: $vgpr130 -; GFX11-NEXT: ; implicit-def: $vgpr128 -; GFX11-NEXT: ; implicit-def: $vgpr118 -; GFX11-NEXT: ; implicit-def: $vgpr35 -; GFX11-NEXT: ; implicit-def: $vgpr114 -; GFX11-NEXT: ; implicit-def: $vgpr113 -; GFX11-NEXT: ; implicit-def: $vgpr112 -; GFX11-NEXT: ; implicit-def: $vgpr103 -; GFX11-NEXT: ; implicit-def: $vgpr101 -; GFX11-NEXT: ; implicit-def: $vgpr99 -; GFX11-NEXT: ; implicit-def: $vgpr87 -; GFX11-NEXT: ; implicit-def: $vgpr85 -; GFX11-NEXT: ; implicit-def: $vgpr84 -; GFX11-NEXT: ; implicit-def: $vgpr83 -; GFX11-NEXT: ; implicit-def: $vgpr81 -; GFX11-NEXT: ; implicit-def: $vgpr71 -; GFX11-NEXT: ; implicit-def: $vgpr69 -; GFX11-NEXT: ; implicit-def: $vgpr34 -; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v33 -; GFX11-NEXT: ; implicit-def: $vgpr33 -; GFX11-NEXT: s_and_saveexec_b32 s0, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 -; GFX11-NEXT: s_cbranch_execz .LBB49_2 -; GFX11-NEXT: ; %bb.1: ; %cmp.false -; GFX11-NEXT: v_lshrrev_b64 v[48:49], 24, v[15:16] -; GFX11-NEXT: v_lshrrev_b64 v[49:50], 24, v[13:14] -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b64 v[33:34], 24, v[31:32] -; GFX11-NEXT: v_lshrrev_b64 v[50:51], 24, v[11:12] -; GFX11-NEXT: v_lshrrev_b64 v[34:35], 24, v[29:30] -; GFX11-NEXT: v_lshrrev_b64 v[51:52], 24, v[9:10] -; GFX11-NEXT: v_lshrrev_b64 v[35:36], 24, v[27:28] -; GFX11-NEXT: v_lshrrev_b64 v[52:53], 24, v[7:8] -; GFX11-NEXT: v_lshrrev_b64 v[64:65], 24, v[1:2] -; GFX11-NEXT: v_lshrrev_b64 v[36:37], 24, v[25:26] -; GFX11-NEXT: v_lshrrev_b64 v[53:54], 24, v[5:6] -; GFX11-NEXT: v_lshrrev_b64 v[37:38], 24, v[23:24] -; GFX11-NEXT: v_lshrrev_b64 v[65:66], 24, v[19:20] -; GFX11-NEXT: v_lshrrev_b32_e32 v68, 24, v16 -; GFX11-NEXT: v_lshrrev_b32_e32 v70, 16, v16 -; GFX11-NEXT: v_lshrrev_b32_e32 v80, 8, v16 -; GFX11-NEXT: v_lshrrev_b32_e32 v82, 16, v15 -; GFX11-NEXT: v_lshrrev_b32_e32 v86, 8, v15 -; GFX11-NEXT: v_lshrrev_b32_e32 v96, 24, v14 -; GFX11-NEXT: v_lshrrev_b32_e32 v97, 16, v14 -; GFX11-NEXT: v_lshrrev_b32_e32 v98, 8, v14 -; GFX11-NEXT: v_lshrrev_b32_e32 v100, 16, v13 -; GFX11-NEXT: v_lshrrev_b32_e32 v102, 8, v13 -; GFX11-NEXT: v_lshrrev_b32_e32 v115, 24, v12 -; GFX11-NEXT: v_lshrrev_b32_e32 v116, 16, v12 -; GFX11-NEXT: v_lshrrev_b32_e32 v117, 8, v12 -; GFX11-NEXT: v_lshrrev_b32_e32 v119, 16, v11 -; GFX11-NEXT: v_lshrrev_b32_e32 v129, 8, v11 -; GFX11-NEXT: v_lshrrev_b32_e32 v131, 24, v10 -; GFX11-NEXT: v_lshrrev_b32_e32 v133, 16, v10 -; GFX11-NEXT: v_lshrrev_b32_e32 v144, 8, v10 -; GFX11-NEXT: v_lshrrev_b32_e32 v145, 16, v9 -; GFX11-NEXT: v_lshrrev_b32_e32 v147, 8, v9 -; GFX11-NEXT: v_lshrrev_b32_e32 v150, 24, v8 -; GFX11-NEXT: v_lshrrev_b32_e32 v151, 16, v8 -; GFX11-NEXT: v_lshrrev_b32_e32 v161, 8, v8 -; GFX11-NEXT: v_lshrrev_b32_e32 v164, 16, v7 -; GFX11-NEXT: v_lshrrev_b32_e32 v165, 8, v7 -; GFX11-NEXT: v_lshrrev_b32_e32 v167, 24, v6 -; GFX11-NEXT: v_lshrrev_b32_e32 v177, 16, v6 -; GFX11-NEXT: v_lshrrev_b32_e32 v179, 8, v6 -; GFX11-NEXT: v_lshrrev_b32_e32 v181, 16, v5 -; GFX11-NEXT: v_lshrrev_b32_e32 v183, 8, v5 -; GFX11-NEXT: v_lshrrev_b32_e32 v42, 24, v4 -; GFX11-NEXT: v_lshrrev_b32_e32 v43, 16, v4 -; GFX11-NEXT: v_lshrrev_b32_e32 v45, 8, v4 -; GFX11-NEXT: v_lshrrev_b32_e32 v47, 16, v3 -; GFX11-NEXT: v_lshrrev_b32_e32 v57, 8, v3 -; GFX11-NEXT: v_lshrrev_b32_e32 v60, 24, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v62, 16, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v63, 8, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v72, 16, v1 -; GFX11-NEXT: v_lshrrev_b32_e32 v74, 8, v1 -; GFX11-NEXT: v_lshrrev_b32_e32 v69, 24, v32 -; GFX11-NEXT: v_lshrrev_b32_e32 v71, 16, v32 -; GFX11-NEXT: v_lshrrev_b32_e32 v81, 8, v32 -; GFX11-NEXT: v_lshrrev_b32_e32 v83, 16, v31 -; GFX11-NEXT: v_lshrrev_b32_e32 v84, 8, v31 -; GFX11-NEXT: v_lshrrev_b32_e32 v85, 24, v30 -; GFX11-NEXT: v_lshrrev_b32_e32 v87, 16, v30 -; GFX11-NEXT: v_lshrrev_b32_e32 v99, 8, v30 -; GFX11-NEXT: v_lshrrev_b32_e32 v101, 16, v29 -; GFX11-NEXT: v_lshrrev_b32_e32 v103, 8, v29 -; GFX11-NEXT: v_lshrrev_b32_e32 v112, 24, v28 -; GFX11-NEXT: v_lshrrev_b32_e32 v113, 16, v28 -; GFX11-NEXT: v_lshrrev_b32_e32 v114, 8, v28 -; GFX11-NEXT: v_lshrrev_b32_e32 v118, 16, v27 -; GFX11-NEXT: v_lshrrev_b32_e32 v128, 8, v27 -; GFX11-NEXT: v_lshrrev_b32_e32 v130, 24, v26 -; GFX11-NEXT: v_lshrrev_b32_e32 v132, 16, v26 -; GFX11-NEXT: v_lshrrev_b32_e32 v134, 8, v26 -; GFX11-NEXT: v_lshrrev_b32_e32 v135, 16, v25 -; GFX11-NEXT: v_lshrrev_b32_e32 v146, 8, v25 -; GFX11-NEXT: v_lshrrev_b32_e32 v148, 24, v24 -; GFX11-NEXT: v_lshrrev_b32_e32 v149, 16, v24 -; GFX11-NEXT: v_lshrrev_b32_e32 v160, 8, v24 -; GFX11-NEXT: v_lshrrev_b32_e32 v162, 16, v23 -; GFX11-NEXT: v_lshrrev_b32_e32 v163, 8, v23 -; GFX11-NEXT: v_lshrrev_b32_e32 v166, 24, v22 -; GFX11-NEXT: v_lshrrev_b32_e32 v176, 16, v22 -; GFX11-NEXT: v_lshrrev_b32_e32 v178, 8, v22 -; GFX11-NEXT: v_lshrrev_b32_e32 v180, 16, v21 -; GFX11-NEXT: v_lshrrev_b32_e32 v182, 8, v21 -; GFX11-NEXT: v_lshrrev_b32_e32 v40, 24, v20 -; GFX11-NEXT: v_lshrrev_b32_e32 v41, 16, v20 -; GFX11-NEXT: v_lshrrev_b32_e32 v44, 8, v20 -; GFX11-NEXT: v_lshrrev_b32_e32 v46, 16, v19 -; GFX11-NEXT: v_lshrrev_b32_e32 v56, 8, v19 -; GFX11-NEXT: v_lshrrev_b32_e32 v58, 24, v18 -; GFX11-NEXT: v_lshrrev_b32_e32 v59, 16, v18 -; GFX11-NEXT: v_lshrrev_b32_e32 v61, 8, v18 -; GFX11-NEXT: v_lshrrev_b32_e32 v73, 16, v17 -; GFX11-NEXT: v_lshrrev_b32_e32 v75, 8, v17 -; GFX11-NEXT: v_lshrrev_b64 v[54:55], 24, v[3:4] -; GFX11-NEXT: v_lshrrev_b64 v[38:39], 24, v[21:22] -; GFX11-NEXT: v_lshrrev_b64 v[66:67], 24, v[17:18] -; GFX11-NEXT: .LBB49_2: ; %Flow -; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-NEXT: s_cbranch_execz .LBB49_4 -; GFX11-NEXT: ; %bb.3: ; %cmp.true -; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0] -; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: v_pk_add_u16 v32, v32, 3 op_sel_hi:[1,0] -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_pk_add_u16 v31, v31, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v13, v13, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v30, v30, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v29, v29, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v12, v12, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v11, v11, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v28, v28, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v27, v27, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v10, v10, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v9, v9, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v26, v26, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v25, v25, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v2, v2, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v1, v1, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v8, v8, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v7, v7, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v20, v20, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v19, v19, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v24, v24, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v23, v23, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v6, v6, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v5, v5, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_lshrrev_b64 v[48:49], 24, v[15:16] -; GFX11-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v22, v22, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v21, v21, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v4, v4, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_pk_add_u16 v3, v3, 3 op_sel_hi:[1,0] -; GFX11-NEXT: v_lshrrev_b64 v[49:50], 24, v[13:14] -; GFX11-NEXT: v_lshrrev_b64 v[33:34], 24, v[31:32] -; GFX11-NEXT: v_lshrrev_b64 v[50:51], 24, v[11:12] -; GFX11-NEXT: v_lshrrev_b64 v[34:35], 24, v[29:30] -; GFX11-NEXT: v_lshrrev_b64 v[51:52], 24, v[9:10] -; GFX11-NEXT: v_lshrrev_b64 v[35:36], 24, v[27:28] -; GFX11-NEXT: v_lshrrev_b64 v[52:53], 24, v[7:8] -; GFX11-NEXT: v_lshrrev_b64 v[64:65], 24, v[1:2] -; GFX11-NEXT: v_lshrrev_b64 v[36:37], 24, v[25:26] -; GFX11-NEXT: v_lshrrev_b64 v[53:54], 24, v[5:6] -; GFX11-NEXT: v_lshrrev_b64 v[37:38], 24, v[23:24] -; GFX11-NEXT: v_lshrrev_b64 v[65:66], 24, v[19:20] -; GFX11-NEXT: v_lshrrev_b64 v[54:55], 24, v[3:4] -; GFX11-NEXT: v_lshrrev_b64 v[38:39], 24, v[21:22] -; GFX11-NEXT: v_lshrrev_b64 v[66:67], 24, v[17:18] -; GFX11-NEXT: v_lshrrev_b32_e32 v68, 24, v16 -; GFX11-NEXT: v_lshrrev_b32_e32 v70, 16, v16 -; GFX11-NEXT: v_lshrrev_b32_e32 v80, 8, v16 -; GFX11-NEXT: v_lshrrev_b32_e32 v82, 16, v15 -; GFX11-NEXT: v_lshrrev_b32_e32 v86, 8, v15 -; GFX11-NEXT: v_lshrrev_b32_e32 v96, 24, v14 -; GFX11-NEXT: v_lshrrev_b32_e32 v97, 16, v14 -; GFX11-NEXT: v_lshrrev_b32_e32 v98, 8, v14 -; GFX11-NEXT: v_lshrrev_b32_e32 v100, 16, v13 -; GFX11-NEXT: v_lshrrev_b32_e32 v102, 8, v13 -; GFX11-NEXT: v_lshrrev_b32_e32 v115, 24, v12 -; GFX11-NEXT: v_lshrrev_b32_e32 v116, 16, v12 -; GFX11-NEXT: v_lshrrev_b32_e32 v117, 8, v12 -; GFX11-NEXT: v_lshrrev_b32_e32 v119, 16, v11 -; GFX11-NEXT: v_lshrrev_b32_e32 v129, 8, v11 -; GFX11-NEXT: v_lshrrev_b32_e32 v131, 24, v10 -; GFX11-NEXT: v_lshrrev_b32_e32 v133, 16, v10 -; GFX11-NEXT: v_lshrrev_b32_e32 v144, 8, v10 -; GFX11-NEXT: v_lshrrev_b32_e32 v145, 16, v9 -; GFX11-NEXT: v_lshrrev_b32_e32 v147, 8, v9 -; GFX11-NEXT: v_lshrrev_b32_e32 v150, 24, v8 -; GFX11-NEXT: v_lshrrev_b32_e32 v151, 16, v8 -; GFX11-NEXT: v_lshrrev_b32_e32 v161, 8, v8 -; GFX11-NEXT: v_lshrrev_b32_e32 v164, 16, v7 -; GFX11-NEXT: v_lshrrev_b32_e32 v165, 8, v7 -; GFX11-NEXT: v_lshrrev_b32_e32 v167, 24, v6 -; GFX11-NEXT: v_lshrrev_b32_e32 v177, 16, v6 -; GFX11-NEXT: v_lshrrev_b32_e32 v179, 8, v6 -; GFX11-NEXT: v_lshrrev_b32_e32 v181, 16, v5 -; GFX11-NEXT: v_lshrrev_b32_e32 v183, 8, v5 -; GFX11-NEXT: v_lshrrev_b32_e32 v42, 24, v4 -; GFX11-NEXT: v_lshrrev_b32_e32 v43, 16, v4 -; GFX11-NEXT: v_lshrrev_b32_e32 v45, 8, v4 -; GFX11-NEXT: v_lshrrev_b32_e32 v47, 16, v3 -; GFX11-NEXT: v_lshrrev_b32_e32 v57, 8, v3 -; GFX11-NEXT: v_lshrrev_b32_e32 v60, 24, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v62, 16, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v63, 8, v2 -; GFX11-NEXT: v_lshrrev_b32_e32 v72, 16, v1 -; GFX11-NEXT: v_lshrrev_b32_e32 v74, 8, v1 -; GFX11-NEXT: v_lshrrev_b32_e32 v69, 24, v32 -; GFX11-NEXT: v_lshrrev_b32_e32 v71, 16, v32 -; GFX11-NEXT: v_lshrrev_b32_e32 v81, 8, v32 -; GFX11-NEXT: v_lshrrev_b32_e32 v83, 16, v31 -; GFX11-NEXT: v_lshrrev_b32_e32 v84, 8, v31 -; GFX11-NEXT: v_lshrrev_b32_e32 v85, 24, v30 -; GFX11-NEXT: v_lshrrev_b32_e32 v87, 16, v30 -; GFX11-NEXT: v_lshrrev_b32_e32 v99, 8, v30 -; GFX11-NEXT: v_lshrrev_b32_e32 v101, 16, v29 -; GFX11-NEXT: v_lshrrev_b32_e32 v103, 8, v29 -; GFX11-NEXT: v_lshrrev_b32_e32 v112, 24, v28 -; GFX11-NEXT: v_lshrrev_b32_e32 v113, 16, v28 -; GFX11-NEXT: v_lshrrev_b32_e32 v114, 8, v28 -; GFX11-NEXT: v_lshrrev_b32_e32 v118, 16, v27 -; GFX11-NEXT: v_lshrrev_b32_e32 v128, 8, v27 -; GFX11-NEXT: v_lshrrev_b32_e32 v130, 24, v26 -; GFX11-NEXT: v_lshrrev_b32_e32 v132, 16, v26 -; GFX11-NEXT: v_lshrrev_b32_e32 v134, 8, v26 -; GFX11-NEXT: v_lshrrev_b32_e32 v135, 16, v25 -; GFX11-NEXT: v_lshrrev_b32_e32 v146, 8, v25 -; GFX11-NEXT: v_lshrrev_b32_e32 v148, 24, v24 -; GFX11-NEXT: v_lshrrev_b32_e32 v149, 16, v24 -; GFX11-NEXT: v_lshrrev_b32_e32 v160, 8, v24 -; GFX11-NEXT: v_lshrrev_b32_e32 v162, 16, v23 -; GFX11-NEXT: v_lshrrev_b32_e32 v163, 8, v23 -; GFX11-NEXT: v_lshrrev_b32_e32 v166, 24, v22 -; GFX11-NEXT: v_lshrrev_b32_e32 v176, 16, v22 -; GFX11-NEXT: v_lshrrev_b32_e32 v178, 8, v22 -; GFX11-NEXT: v_lshrrev_b32_e32 v180, 16, v21 -; GFX11-NEXT: v_lshrrev_b32_e32 v182, 8, v21 -; GFX11-NEXT: v_lshrrev_b32_e32 v40, 24, v20 -; GFX11-NEXT: v_lshrrev_b32_e32 v41, 16, v20 -; GFX11-NEXT: v_lshrrev_b32_e32 v44, 8, v20 -; GFX11-NEXT: v_lshrrev_b32_e32 v46, 16, v19 -; GFX11-NEXT: v_lshrrev_b32_e32 v56, 8, v19 -; GFX11-NEXT: v_lshrrev_b32_e32 v58, 24, v18 -; GFX11-NEXT: v_lshrrev_b32_e32 v59, 16, v18 -; GFX11-NEXT: v_lshrrev_b32_e32 v61, 8, v18 -; GFX11-NEXT: v_lshrrev_b32_e32 v73, 16, v17 -; GFX11-NEXT: v_lshrrev_b32_e32 v75, 8, v17 -; GFX11-NEXT: .LBB49_4: ; %end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 -; GFX11-NEXT: v_lshlrev_b16 v55, 8, v74 -; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-NEXT: v_lshlrev_b16 v39, 8, v64 -; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GFX11-NEXT: v_lshlrev_b16 v64, 8, v60 -; GFX11-NEXT: v_lshlrev_b16 v54, 8, v54 -; GFX11-NEXT: v_or_b32_e32 v1, v1, v55 -; GFX11-NEXT: v_and_b32_e32 v55, 0xff, v72 -; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GFX11-NEXT: v_and_b32_e32 v67, 0xff, v47 -; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 -; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX11-NEXT: v_or_b32_e32 v39, v55, v39 -; GFX11-NEXT: v_lshlrev_b16 v55, 8, v63 -; GFX11-NEXT: v_or_b32_e32 v54, v67, v54 -; GFX11-NEXT: v_lshlrev_b16 v67, 8, v42 -; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v39 -; GFX11-NEXT: v_or_b32_e32 v2, v2, v55 -; GFX11-NEXT: v_and_b32_e32 v55, 0xff, v62 -; GFX11-NEXT: v_lshlrev_b16 v53, 8, v53 -; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX11-NEXT: v_or_b32_e32 v1, v1, v39 -; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX11-NEXT: v_or_b32_e32 v55, v55, v64 -; GFX11-NEXT: v_lshlrev_b16 v64, 8, v57 -; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX11-NEXT: v_lshlrev_b16 v52, 8, v52 -; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v55 -; GFX11-NEXT: v_or_b32_e32 v3, v3, v64 -; GFX11-NEXT: v_lshlrev_b16 v55, 8, v45 -; GFX11-NEXT: v_and_b32_e32 v64, 0xff, v43 -; GFX11-NEXT: v_lshlrev_b16 v51, 8, v51 -; GFX11-NEXT: v_or_b32_e32 v2, v2, v39 -; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v54 -; GFX11-NEXT: v_or_b32_e32 v4, v4, v55 -; GFX11-NEXT: v_or_b32_e32 v54, v64, v67 -; GFX11-NEXT: v_lshlrev_b16 v55, 8, v183 -; GFX11-NEXT: v_and_b32_e32 v64, 0xff, v181 -; GFX11-NEXT: v_or_b32_e32 v3, v3, v39 -; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v54 -; GFX11-NEXT: v_or_b32_e32 v5, v5, v55 -; GFX11-NEXT: v_or_b32_e32 v53, v64, v53 -; GFX11-NEXT: v_lshlrev_b16 v54, 8, v179 -; GFX11-NEXT: v_and_b32_e32 v55, 0xff, v177 -; GFX11-NEXT: v_lshlrev_b16 v64, 8, v167 -; GFX11-NEXT: v_or_b32_e32 v4, v4, v39 -; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v53 -; GFX11-NEXT: v_or_b32_e32 v6, v6, v54 -; GFX11-NEXT: v_or_b32_e32 v53, v55, v64 -; GFX11-NEXT: v_lshlrev_b16 v54, 8, v165 -; GFX11-NEXT: v_and_b32_e32 v55, 0xff, v164 -; GFX11-NEXT: v_or_b32_e32 v5, v5, v39 -; GFX11-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v53 -; GFX11-NEXT: v_or_b32_e32 v7, v7, v54 -; GFX11-NEXT: v_or_b32_e32 v52, v55, v52 -; GFX11-NEXT: v_lshlrev_b16 v53, 8, v161 -; GFX11-NEXT: v_and_b32_e32 v54, 0xff, v151 -; GFX11-NEXT: v_lshlrev_b16 v55, 8, v150 -; GFX11-NEXT: v_or_b32_e32 v6, v6, v39 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v52 -; GFX11-NEXT: v_or_b32_e32 v8, v8, v53 -; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX11-NEXT: v_or_b32_e32 v52, v54, v55 -; GFX11-NEXT: v_and_b32_e32 v54, 0xff, v145 -; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v9 -; GFX11-NEXT: v_lshlrev_b16 v53, 8, v147 -; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 -; GFX11-NEXT: v_lshlrev_b16 v55, 8, v144 -; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8 -; GFX11-NEXT: v_lshlrev_b32_e32 v52, 16, v52 -; GFX11-NEXT: v_or_b32_e32 v51, v54, v51 -; GFX11-NEXT: v_or_b32_e32 v9, v9, v53 -; GFX11-NEXT: v_or_b32_e32 v10, v10, v55 -; GFX11-NEXT: v_or_b32_e32 v7, v7, v39 -; GFX11-NEXT: v_or_b32_e32 v8, v8, v52 -; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v51 -; GFX11-NEXT: v_and_b32_e32 v51, 0xff, v133 -; GFX11-NEXT: v_lshlrev_b16 v52, 8, v131 -; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v11 -; GFX11-NEXT: v_lshlrev_b16 v53, 8, v129 -; GFX11-NEXT: v_and_b32_e32 v54, 0xff, v119 -; GFX11-NEXT: v_lshlrev_b16 v50, 8, v50 -; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 -; GFX11-NEXT: v_lshlrev_b16 v55, 8, v117 -; GFX11-NEXT: v_and_b32_e32 v64, 0xff, v116 -; GFX11-NEXT: v_lshlrev_b16 v67, 8, v115 -; GFX11-NEXT: v_or_b32_e32 v51, v51, v52 -; GFX11-NEXT: v_or_b32_e32 v11, v11, v53 -; GFX11-NEXT: v_or_b32_e32 v50, v54, v50 -; GFX11-NEXT: v_or_b32_e32 v12, v12, v55 -; GFX11-NEXT: v_or_b32_e32 v52, v64, v67 -; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v9 -; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-NEXT: v_lshlrev_b32_e32 v51, 16, v51 -; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-NEXT: v_lshlrev_b32_e32 v50, 16, v50 -; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-NEXT: v_lshlrev_b32_e32 v52, 16, v52 -; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off -; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 -; GFX11-NEXT: v_or_b32_e32 v1, v9, v39 -; GFX11-NEXT: v_or_b32_e32 v2, v10, v51 -; GFX11-NEXT: v_or_b32_e32 v3, v11, v50 -; GFX11-NEXT: v_or_b32_e32 v4, v12, v52 -; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v13 -; GFX11-NEXT: v_lshlrev_b16 v6, 8, v102 -; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v100 -; GFX11-NEXT: v_lshlrev_b16 v8, 8, v49 -; GFX11-NEXT: v_and_b32_e32 v9, 0xff, v14 -; GFX11-NEXT: v_lshlrev_b16 v10, 8, v98 -; GFX11-NEXT: v_and_b32_e32 v11, 0xff, v97 -; GFX11-NEXT: v_lshlrev_b16 v12, 8, v96 -; GFX11-NEXT: v_and_b32_e32 v13, 0xff, v15 -; GFX11-NEXT: v_lshlrev_b16 v14, 8, v86 -; GFX11-NEXT: v_or_b32_e32 v5, v5, v6 -; GFX11-NEXT: v_or_b32_e32 v6, v7, v8 -; GFX11-NEXT: v_or_b32_e32 v7, v9, v10 -; GFX11-NEXT: v_or_b32_e32 v8, v11, v12 -; GFX11-NEXT: v_or_b32_e32 v9, v13, v14 -; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v82 -; GFX11-NEXT: v_lshlrev_b16 v11, 8, v48 -; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v16 -; GFX11-NEXT: v_lshlrev_b16 v13, 8, v80 -; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v70 -; GFX11-NEXT: v_lshlrev_b16 v15, 8, v68 -; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v17 -; GFX11-NEXT: v_lshlrev_b16 v17, 8, v75 -; GFX11-NEXT: v_and_b32_e32 v39, 0xff, v73 -; GFX11-NEXT: v_lshlrev_b16 v48, 8, v66 -; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-NEXT: v_or_b32_e32 v11, v12, v13 -; GFX11-NEXT: v_or_b32_e32 v12, v14, v15 -; GFX11-NEXT: v_or_b32_e32 v13, v16, v17 -; GFX11-NEXT: v_or_b32_e32 v14, v39, v48 -; GFX11-NEXT: v_and_b32_e32 v5, 0xffff, v5 -; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7 -; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v9 -; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 -; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 -; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v12 -; GFX11-NEXT: v_and_b32_e32 v13, 0xffff, v13 -; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 -; GFX11-NEXT: v_or_b32_e32 v5, v5, v6 -; GFX11-NEXT: v_or_b32_e32 v6, v7, v8 -; GFX11-NEXT: v_or_b32_e32 v7, v9, v10 -; GFX11-NEXT: v_or_b32_e32 v8, v11, v12 -; GFX11-NEXT: v_or_b32_e32 v9, v13, v14 -; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v18 -; GFX11-NEXT: v_lshlrev_b16 v11, 8, v61 -; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v59 -; GFX11-NEXT: v_lshlrev_b16 v13, 8, v58 -; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v19 -; GFX11-NEXT: v_lshlrev_b16 v15, 8, v56 -; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v46 -; GFX11-NEXT: v_lshlrev_b16 v17, 8, v65 -; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v20 -; GFX11-NEXT: v_lshlrev_b16 v19, 8, v44 -; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-NEXT: v_or_b32_e32 v11, v12, v13 -; GFX11-NEXT: v_or_b32_e32 v12, v14, v15 -; GFX11-NEXT: v_or_b32_e32 v13, v16, v17 -; GFX11-NEXT: v_or_b32_e32 v14, v18, v19 -; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v41 -; GFX11-NEXT: v_lshlrev_b16 v16, 8, v40 -; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v21 -; GFX11-NEXT: v_lshlrev_b16 v18, 8, v182 -; GFX11-NEXT: v_and_b32_e32 v19, 0xff, v180 -; GFX11-NEXT: v_lshlrev_b16 v20, 8, v38 -; GFX11-NEXT: v_and_b32_e32 v21, 0xff, v22 -; GFX11-NEXT: v_lshlrev_b16 v22, 8, v178 -; GFX11-NEXT: v_and_b32_e32 v38, 0xff, v176 -; GFX11-NEXT: v_lshlrev_b16 v39, 8, v166 -; GFX11-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-NEXT: v_or_b32_e32 v16, v17, v18 -; GFX11-NEXT: v_or_b32_e32 v17, v19, v20 -; GFX11-NEXT: v_or_b32_e32 v18, v21, v22 -; GFX11-NEXT: v_or_b32_e32 v19, v38, v39 -; GFX11-NEXT: v_and_b32_e32 v10, 0xffff, v10 -; GFX11-NEXT: v_lshlrev_b32_e32 v11, 16, v11 -; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 -; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13 -; GFX11-NEXT: v_and_b32_e32 v14, 0xffff, v14 -; GFX11-NEXT: v_lshlrev_b32_e32 v15, 16, v15 -; GFX11-NEXT: v_and_b32_e32 v16, 0xffff, v16 -; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v17 -; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v18 -; GFX11-NEXT: v_lshlrev_b32_e32 v19, 16, v19 -; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 -; GFX11-NEXT: v_or_b32_e32 v11, v12, v13 -; GFX11-NEXT: v_or_b32_e32 v12, v14, v15 -; GFX11-NEXT: v_or_b32_e32 v13, v16, v17 -; GFX11-NEXT: v_or_b32_e32 v14, v18, v19 -; GFX11-NEXT: v_and_b32_e32 v15, 0xff, v23 -; GFX11-NEXT: v_lshlrev_b16 v16, 8, v163 -; GFX11-NEXT: v_and_b32_e32 v17, 0xff, v162 -; GFX11-NEXT: v_lshlrev_b16 v18, 8, v37 -; GFX11-NEXT: v_and_b32_e32 v19, 0xff, v24 -; GFX11-NEXT: v_lshlrev_b16 v20, 8, v160 -; GFX11-NEXT: v_and_b32_e32 v21, 0xff, v149 -; GFX11-NEXT: v_lshlrev_b16 v22, 8, v148 -; GFX11-NEXT: v_and_b32_e32 v23, 0xff, v25 -; GFX11-NEXT: v_lshlrev_b16 v24, 8, v146 -; GFX11-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-NEXT: v_or_b32_e32 v16, v17, v18 -; GFX11-NEXT: v_or_b32_e32 v17, v19, v20 -; GFX11-NEXT: v_or_b32_e32 v18, v21, v22 -; GFX11-NEXT: v_or_b32_e32 v19, v23, v24 -; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v135 -; GFX11-NEXT: v_lshlrev_b16 v21, 8, v36 -; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v26 -; GFX11-NEXT: v_lshlrev_b16 v23, 8, v134 -; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v132 -; GFX11-NEXT: v_lshlrev_b16 v25, 8, v130 -; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v27 -; GFX11-NEXT: v_lshlrev_b16 v27, 8, v128 -; GFX11-NEXT: v_and_b32_e32 v36, 0xff, v118 -; GFX11-NEXT: v_lshlrev_b16 v35, 8, v35 -; GFX11-NEXT: v_or_b32_e32 v20, v20, v21 -; GFX11-NEXT: v_or_b32_e32 v21, v22, v23 -; GFX11-NEXT: v_or_b32_e32 v22, v24, v25 -; GFX11-NEXT: v_or_b32_e32 v23, v26, v27 -; GFX11-NEXT: v_or_b32_e32 v24, v36, v35 -; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v15 -; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v16 -; GFX11-NEXT: v_and_b32_e32 v17, 0xffff, v17 -; GFX11-NEXT: v_lshlrev_b32_e32 v18, 16, v18 -; GFX11-NEXT: v_and_b32_e32 v19, 0xffff, v19 -; GFX11-NEXT: v_lshlrev_b32_e32 v20, 16, v20 -; GFX11-NEXT: v_and_b32_e32 v21, 0xffff, v21 -; GFX11-NEXT: v_lshlrev_b32_e32 v22, 16, v22 -; GFX11-NEXT: v_and_b32_e32 v23, 0xffff, v23 -; GFX11-NEXT: v_lshlrev_b32_e32 v24, 16, v24 -; GFX11-NEXT: v_or_b32_e32 v15, v15, v16 -; GFX11-NEXT: v_or_b32_e32 v16, v17, v18 -; GFX11-NEXT: v_or_b32_e32 v17, v19, v20 -; GFX11-NEXT: v_or_b32_e32 v18, v21, v22 -; GFX11-NEXT: v_or_b32_e32 v19, v23, v24 -; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v28 -; GFX11-NEXT: v_lshlrev_b16 v21, 8, v114 -; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v113 -; GFX11-NEXT: v_lshlrev_b16 v23, 8, v112 -; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v29 -; GFX11-NEXT: v_lshlrev_b16 v25, 8, v103 -; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v101 -; GFX11-NEXT: v_lshlrev_b16 v27, 8, v34 -; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v30 -; GFX11-NEXT: v_lshlrev_b16 v29, 8, v99 -; GFX11-NEXT: v_or_b32_e32 v20, v20, v21 -; GFX11-NEXT: v_or_b32_e32 v21, v22, v23 -; GFX11-NEXT: v_or_b32_e32 v22, v24, v25 -; GFX11-NEXT: v_or_b32_e32 v23, v26, v27 -; GFX11-NEXT: v_or_b32_e32 v24, v28, v29 -; GFX11-NEXT: v_and_b32_e32 v25, 0xff, v87 -; GFX11-NEXT: v_lshlrev_b16 v26, 8, v85 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_and_b32_e32 v27, 0xff, v31 -; GFX11-NEXT: v_lshlrev_b16 v28, 8, v84 -; GFX11-NEXT: v_and_b32_e32 v29, 0xff, v83 -; GFX11-NEXT: v_lshlrev_b16 v30, 8, v33 -; GFX11-NEXT: v_and_b32_e32 v31, 0xff, v32 -; GFX11-NEXT: v_lshlrev_b16 v32, 8, v81 -; GFX11-NEXT: v_and_b32_e32 v33, 0xff, v71 -; GFX11-NEXT: v_lshlrev_b16 v34, 8, v69 -; GFX11-NEXT: v_or_b32_e32 v25, v25, v26 -; GFX11-NEXT: v_or_b32_e32 v26, v27, v28 -; GFX11-NEXT: v_or_b32_e32 v27, v29, v30 -; GFX11-NEXT: v_or_b32_e32 v28, v31, v32 -; GFX11-NEXT: v_or_b32_e32 v29, v33, v34 -; GFX11-NEXT: v_and_b32_e32 v20, 0xffff, v20 -; GFX11-NEXT: v_lshlrev_b32_e32 v21, 16, v21 -; GFX11-NEXT: v_and_b32_e32 v22, 0xffff, v22 -; GFX11-NEXT: v_lshlrev_b32_e32 v23, 16, v23 -; GFX11-NEXT: v_and_b32_e32 v24, 0xffff, v24 -; GFX11-NEXT: v_lshlrev_b32_e32 v25, 16, v25 -; GFX11-NEXT: v_and_b32_e32 v26, 0xffff, v26 -; GFX11-NEXT: v_lshlrev_b32_e32 v27, 16, v27 -; GFX11-NEXT: v_and_b32_e32 v28, 0xffff, v28 -; GFX11-NEXT: v_lshlrev_b32_e32 v29, 16, v29 -; GFX11-NEXT: v_or_b32_e32 v20, v20, v21 -; GFX11-NEXT: v_or_b32_e32 v21, v22, v23 -; GFX11-NEXT: v_or_b32_e32 v22, v24, v25 -; GFX11-NEXT: v_or_b32_e32 v23, v26, v27 -; GFX11-NEXT: v_or_b32_e32 v24, v28, v29 -; GFX11-NEXT: s_clause 0x5 -; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:32 -; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:48 -; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:64 -; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:80 -; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:96 -; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:112 -; GFX11-NEXT: s_clause 0x13 -; GFX11-NEXT: scratch_load_b32 v75, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v74, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v73, off, s32 offset:20 -; GFX11-NEXT: scratch_load_b32 v72, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v47, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v46, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v45, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:88 -; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: s_setpc_b64 s[30:31] - %cmp = icmp eq i32 %b, 0 - br i1 %cmp, label %cmp.true, label %cmp.false - -cmp.true: - %a1 = add <64 x i16> %a, splat (i16 3) - %a2 = bitcast <64 x i16> %a1 to <128 x i8> - br label %end - -cmp.false: - %a3 = bitcast <64 x i16> %a to <128 x i8> - br label %end - -end: - %phi = phi <128 x i8> [ %a2, %cmp.true ], [ %a3, %cmp.false ] - ret <128 x i8> %phi -} - define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GCN-LABEL: bitcast_v64bf16_to_v64f16: ; GCN: ; %bb.0: @@ -97989,7 +93041,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr3 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB50_2 +; GCN-NEXT: s_cbranch_execz .LBB49_2 ; GCN-NEXT: ; %bb.1: ; %cmp.false ; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -98344,9 +93396,9 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr63 ; GCN-NEXT: ; implicit-def: $vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: .LBB50_2: ; %Flow +; GCN-NEXT: .LBB49_2: ; %Flow ; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB50_4 +; GCN-NEXT: s_cbranch_execz .LBB49_4 ; GCN-NEXT: ; %bb.3: ; %cmp.true ; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 ; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 @@ -98790,7 +93842,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cvt_f32_f16_e32 v3, v1 -; GCN-NEXT: .LBB50_4: ; %end +; GCN-NEXT: .LBB49_4: ; %end ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload @@ -99104,7 +94156,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; VI-NEXT: s_cbranch_execz .LBB50_2 +; VI-NEXT: s_cbranch_execz .LBB49_2 ; VI-NEXT: ; %bb.1: ; %cmp.true ; VI-NEXT: v_lshlrev_b32_e32 v32, 16, v16 ; VI-NEXT: v_add_f32_e32 v32, 0x40c00000, v32 @@ -99690,7 +94742,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_alignbit_b32 v16, v16, v32, 16 -; VI-NEXT: .LBB50_2: ; %end +; VI-NEXT: .LBB49_2: ; %end ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -99737,7 +94789,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB50_2 +; GFX9-NEXT: s_cbranch_execz .LBB49_2 ; GFX9-NEXT: ; %bb.1: ; %cmp.true ; GFX9-NEXT: v_lshlrev_b32_e32 v32, 16, v16 ; GFX9-NEXT: v_add_f32_e32 v32, 0x40c00000, v32 @@ -100227,7 +95279,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GFX9-NEXT: v_perm_b32 v18, v18, v34, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_perm_b32 v16, v32, v16, s6 -; GFX9-NEXT: .LBB50_2: ; %end +; GFX9-NEXT: .LBB49_2: ; %end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -100260,7 +95312,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-NEXT: s_cbranch_execz .LBB50_2 +; GFX11-NEXT: s_cbranch_execz .LBB49_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.true ; GFX11-NEXT: v_lshlrev_b32_e32 v33, 16, v17 ; GFX11-NEXT: v_lshlrev_b32_e32 v32, 16, v16 @@ -100770,7 +95822,7 @@ define <64 x half> @bitcast_v64bf16_to_v64f16(<64 x bfloat> %a, i32 %b) { ; GFX11-NEXT: v_cndmask_b32_e32 v11, v85, v96, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_perm_b32 v11, v11, v83, 0x7060302 -; GFX11-NEXT: .LBB50_2: ; %end +; GFX11-NEXT: .LBB49_2: ; %end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -101055,7 +96107,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; GCN-NEXT: ; kill: killed $vgpr2 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB51_2 +; GCN-NEXT: s_cbranch_execz .LBB50_2 ; GCN-NEXT: ; %bb.1: ; %cmp.false ; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -101319,9 +96371,9 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr63 ; GCN-NEXT: ; implicit-def: $vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: .LBB51_2: ; %Flow +; GCN-NEXT: .LBB50_2: ; %Flow ; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB51_4 +; GCN-NEXT: s_cbranch_execz .LBB50_4 ; GCN-NEXT: ; %bb.3: ; %cmp.true ; GCN-NEXT: v_cvt_f32_f16_e32 v31, v31 ; GCN-NEXT: s_waitcnt expcnt(0) @@ -101727,7 +96779,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill ; GCN-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: .LBB51_4: ; %end +; GCN-NEXT: .LBB50_4: ; %end ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload @@ -102077,7 +97129,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; VI-NEXT: s_cbranch_execz .LBB51_2 +; VI-NEXT: s_cbranch_execz .LBB50_2 ; VI-NEXT: ; %bb.1: ; %cmp.true ; VI-NEXT: v_mov_b32_e32 v32, 0x200 ; VI-NEXT: v_add_f16_e32 v33, 0x200, v15 @@ -102177,7 +97229,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; VI-NEXT: v_add_f16_e32 v16, 0x200, v16 ; VI-NEXT: v_or_b32_e32 v17, v33, v17 ; VI-NEXT: v_or_b32_e32 v16, v16, v32 -; VI-NEXT: .LBB51_2: ; %end +; VI-NEXT: .LBB50_2: ; %end ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] @@ -102192,7 +97244,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB51_2 +; GFX9-NEXT: s_cbranch_execz .LBB50_2 ; GFX9-NEXT: ; %bb.1: ; %cmp.true ; GFX9-NEXT: s_movk_i32 s6, 0x200 ; GFX9-NEXT: v_pk_add_f16 v15, v15, s6 op_sel_hi:[1,0] @@ -102228,7 +97280,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; GFX9-NEXT: v_pk_add_f16 v18, v18, s6 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_f16 v17, v17, s6 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_f16 v16, v16, s6 op_sel_hi:[1,0] -; GFX9-NEXT: .LBB51_2: ; %end +; GFX9-NEXT: .LBB50_2: ; %end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -102245,7 +97297,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-NEXT: s_cbranch_execz .LBB51_2 +; GFX11-NEXT: s_cbranch_execz .LBB50_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.true ; GFX11-NEXT: v_pk_add_f16 v15, 0x200, v15 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v14, 0x200, v14 op_sel_hi:[0,1] @@ -102280,7 +97332,7 @@ define <64 x bfloat> @bitcast_v64f16_to_v64bf16(<64 x half> %a, i32 %b) { ; GFX11-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v17, 0x200, v17 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v16, 0x200, v16 op_sel_hi:[0,1] -; GFX11-NEXT: .LBB51_2: ; %end +; GFX11-NEXT: .LBB50_2: ; %end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -102583,7 +97635,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr26 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB52_2 +; GCN-NEXT: s_cbranch_execz .LBB51_2 ; GCN-NEXT: ; %bb.1: ; %cmp.false ; GCN-NEXT: v_lshrrev_b32_e32 v26, 16, v36 ; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill @@ -102862,9 +97914,9 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr14 ; GCN-NEXT: ; implicit-def: $vgpr12 ; GCN-NEXT: ; implicit-def: $vgpr11 -; GCN-NEXT: .LBB52_2: ; %Flow +; GCN-NEXT: .LBB51_2: ; %Flow ; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB52_4 +; GCN-NEXT: s_cbranch_execz .LBB51_4 ; GCN-NEXT: ; %bb.3: ; %cmp.true ; GCN-NEXT: v_and_b32_e32 v26, 0xffff0000, v36 ; GCN-NEXT: v_and_b32_e32 v27, 0xffff0000, v35 @@ -103214,7 +98266,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: v_alignbit_b32 v1, v39, v11, 16 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: .LBB52_4: ; %end +; GCN-NEXT: .LBB51_4: ; %end ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload @@ -103506,7 +98558,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; VI-NEXT: s_cbranch_execz .LBB52_2 +; VI-NEXT: s_cbranch_execz .LBB51_2 ; VI-NEXT: ; %bb.1: ; %cmp.true ; VI-NEXT: v_lshlrev_b32_e32 v32, 16, v16 ; VI-NEXT: v_add_f32_e32 v32, 0x40c00000, v32 @@ -104092,7 +99144,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v16 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_alignbit_b32 v16, v16, v32, 16 -; VI-NEXT: .LBB52_2: ; %end +; VI-NEXT: .LBB51_2: ; %end ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; VI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -104139,7 +99191,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB52_2 +; GFX9-NEXT: s_cbranch_execz .LBB51_2 ; GFX9-NEXT: ; %bb.1: ; %cmp.true ; GFX9-NEXT: v_lshlrev_b32_e32 v32, 16, v16 ; GFX9-NEXT: v_add_f32_e32 v32, 0x40c00000, v32 @@ -104629,7 +99681,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; GFX9-NEXT: v_perm_b32 v18, v18, v34, s6 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_perm_b32 v16, v32, v16, s6 -; GFX9-NEXT: .LBB52_2: ; %end +; GFX9-NEXT: .LBB51_2: ; %end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload @@ -104662,7 +99714,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-NEXT: s_cbranch_execz .LBB52_2 +; GFX11-NEXT: s_cbranch_execz .LBB51_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.true ; GFX11-NEXT: v_lshlrev_b32_e32 v33, 16, v17 ; GFX11-NEXT: v_lshlrev_b32_e32 v32, 16, v16 @@ -105172,7 +100224,7 @@ define <64 x i16> @bitcast_v64bf16_to_v64i16(<64 x bfloat> %a, i32 %b) { ; GFX11-NEXT: v_cndmask_b32_e32 v11, v85, v96, vcc_lo ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_perm_b32 v11, v11, v83, 0x7060302 -; GFX11-NEXT: .LBB52_2: ; %end +; GFX11-NEXT: .LBB51_2: ; %end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -105370,7 +100422,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr28 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB53_2 +; GCN-NEXT: s_cbranch_execz .LBB52_2 ; GCN-NEXT: ; %bb.1: ; %cmp.false ; GCN-NEXT: v_lshlrev_b32_e32 v20, 16, v1 ; GCN-NEXT: v_lshlrev_b32_e32 v18, 16, v3 @@ -105460,9 +100512,9 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr12 ; GCN-NEXT: ; implicit-def: $vgpr14 ; GCN-NEXT: ; implicit-def: $vgpr10 -; GCN-NEXT: .LBB53_2: ; %Flow +; GCN-NEXT: .LBB52_2: ; %Flow ; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB53_4 +; GCN-NEXT: s_cbranch_execz .LBB52_4 ; GCN-NEXT: ; %bb.3: ; %cmp.true ; GCN-NEXT: v_add_i32_e32 v10, vcc, 3, v10 ; GCN-NEXT: v_and_b32_e32 v10, 0xffff, v10 @@ -105767,7 +100819,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; GCN-NEXT: v_lshlrev_b32_e32 v26, 16, v14 ; GCN-NEXT: v_and_b32_e32 v55, 0xffff0000, v10 ; GCN-NEXT: v_lshlrev_b32_e32 v28, 16, v10 -; GCN-NEXT: .LBB53_4: ; %end +; GCN-NEXT: .LBB52_4: ; %end ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload @@ -106063,7 +101115,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; VI-NEXT: s_cbranch_execz .LBB53_2 +; VI-NEXT: s_cbranch_execz .LBB52_2 ; VI-NEXT: ; %bb.1: ; %cmp.true ; VI-NEXT: v_mov_b32_e32 v32, 3 ; VI-NEXT: v_add_u16_sdwa v33, v15, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD @@ -106163,7 +101215,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; VI-NEXT: v_add_u16_e32 v16, 3, v16 ; VI-NEXT: v_or_b32_e32 v17, v17, v33 ; VI-NEXT: v_or_b32_e32 v16, v16, v32 -; VI-NEXT: .LBB53_2: ; %end +; VI-NEXT: .LBB52_2: ; %end ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] @@ -106178,7 +101230,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB53_2 +; GFX9-NEXT: s_cbranch_execz .LBB52_2 ; GFX9-NEXT: ; %bb.1: ; %cmp.true ; GFX9-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0] @@ -106213,7 +101265,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; GFX9-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] -; GFX9-NEXT: .LBB53_2: ; %end +; GFX9-NEXT: .LBB52_2: ; %end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -106230,7 +101282,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-NEXT: s_cbranch_execz .LBB53_2 +; GFX11-NEXT: s_cbranch_execz .LBB52_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.true ; GFX11-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0] @@ -106265,7 +101317,7 @@ define <64 x bfloat> @bitcast_v64i16_to_v64bf16(<64 x i16> %a, i32 %b) { ; GFX11-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] -; GFX11-NEXT: .LBB53_2: ; %end +; GFX11-NEXT: .LBB52_2: ; %end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -106453,7 +101505,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; GCN-NEXT: v_mov_b32_e32 v36, v5 ; GCN-NEXT: v_mov_b32_e32 v46, v6 ; GCN-NEXT: s_xor_b64 exec, exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB54_2 +; GCN-NEXT: s_cbranch_execz .LBB53_2 ; GCN-NEXT: ; %bb.1: ; %cmp.true ; GCN-NEXT: v_cvt_f32_f16_e32 v12, v12 ; GCN-NEXT: v_cvt_f32_f16_e32 v9, v9 @@ -106786,7 +101838,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; GCN-NEXT: v_alignbit_b32 v43, v14, v43, 16 ; GCN-NEXT: v_alignbit_b32 v40, v11, v40, 16 ; GCN-NEXT: v_alignbit_b32 v55, v9, v55, 16 -; GCN-NEXT: .LBB54_2: ; %end +; GCN-NEXT: .LBB53_2: ; %end ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v30, v1 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload @@ -107008,7 +102060,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; VI-NEXT: s_cbranch_execz .LBB54_2 +; VI-NEXT: s_cbranch_execz .LBB53_2 ; VI-NEXT: ; %bb.1: ; %cmp.true ; VI-NEXT: v_mov_b32_e32 v32, 0x200 ; VI-NEXT: v_add_f16_e32 v33, 0x200, v15 @@ -107108,7 +102160,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; VI-NEXT: v_add_f16_e32 v16, 0x200, v16 ; VI-NEXT: v_or_b32_e32 v17, v33, v17 ; VI-NEXT: v_or_b32_e32 v16, v16, v32 -; VI-NEXT: .LBB54_2: ; %end +; VI-NEXT: .LBB53_2: ; %end ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] @@ -107123,7 +102175,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB54_2 +; GFX9-NEXT: s_cbranch_execz .LBB53_2 ; GFX9-NEXT: ; %bb.1: ; %cmp.true ; GFX9-NEXT: s_movk_i32 s6, 0x200 ; GFX9-NEXT: v_pk_add_f16 v15, v15, s6 op_sel_hi:[1,0] @@ -107159,7 +102211,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; GFX9-NEXT: v_pk_add_f16 v18, v18, s6 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_f16 v17, v17, s6 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_f16 v16, v16, s6 op_sel_hi:[1,0] -; GFX9-NEXT: .LBB54_2: ; %end +; GFX9-NEXT: .LBB53_2: ; %end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -107176,7 +102228,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-NEXT: s_cbranch_execz .LBB54_2 +; GFX11-NEXT: s_cbranch_execz .LBB53_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.true ; GFX11-NEXT: v_pk_add_f16 v15, 0x200, v15 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v14, 0x200, v14 op_sel_hi:[0,1] @@ -107211,7 +102263,7 @@ define <64 x i16> @bitcast_v64f16_to_v64i16(<64 x half> %a, i32 %b) { ; GFX11-NEXT: v_pk_add_f16 v18, 0x200, v18 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v17, 0x200, v17 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_add_f16 v16, 0x200, v16 op_sel_hi:[0,1] -; GFX11-NEXT: .LBB54_2: ; %end +; GFX11-NEXT: .LBB53_2: ; %end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -107424,7 +102476,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; GCN-NEXT: ; kill: killed $vgpr1 ; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB55_2 +; GCN-NEXT: s_cbranch_execz .LBB54_2 ; GCN-NEXT: ; %bb.1: ; %cmp.false ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -107687,9 +102739,9 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; GCN-NEXT: ; implicit-def: $vgpr44 ; GCN-NEXT: ; implicit-def: $vgpr45 ; GCN-NEXT: ; implicit-def: $vgpr46 -; GCN-NEXT: .LBB55_2: ; %Flow +; GCN-NEXT: .LBB54_2: ; %Flow ; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GCN-NEXT: s_cbranch_execz .LBB55_4 +; GCN-NEXT: s_cbranch_execz .LBB54_4 ; GCN-NEXT: ; %bb.3: ; %cmp.true ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: v_add_i32_e32 v1, vcc, 3, v46 @@ -107963,7 +103015,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cvt_f32_f16_e32 v1, v1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; GCN-NEXT: .LBB55_4: ; %end +; GCN-NEXT: .LBB54_4: ; %end ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload @@ -108317,7 +103369,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc ; VI-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; VI-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; VI-NEXT: s_cbranch_execz .LBB55_2 +; VI-NEXT: s_cbranch_execz .LBB54_2 ; VI-NEXT: ; %bb.1: ; %cmp.true ; VI-NEXT: v_mov_b32_e32 v32, 3 ; VI-NEXT: v_add_u16_sdwa v33, v15, v32 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD @@ -108417,7 +103469,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; VI-NEXT: v_add_u16_e32 v16, 3, v16 ; VI-NEXT: v_or_b32_e32 v17, v17, v33 ; VI-NEXT: v_or_b32_e32 v16, v16, v32 -; VI-NEXT: .LBB55_2: ; %end +; VI-NEXT: .LBB54_2: ; %end ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] @@ -108432,7 +103484,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc ; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5] ; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] -; GFX9-NEXT: s_cbranch_execz .LBB55_2 +; GFX9-NEXT: s_cbranch_execz .LBB54_2 ; GFX9-NEXT: ; %bb.1: ; %cmp.true ; GFX9-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0] @@ -108467,7 +103519,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; GFX9-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] ; GFX9-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] -; GFX9-NEXT: .LBB55_2: ; %end +; GFX9-NEXT: .LBB54_2: ; %end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -108484,7 +103536,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_not1_saveexec_b32 s0, s0 -; GFX11-NEXT: s_cbranch_execz .LBB55_2 +; GFX11-NEXT: s_cbranch_execz .LBB54_2 ; GFX11-NEXT: ; %bb.1: ; %cmp.true ; GFX11-NEXT: v_pk_add_u16 v15, v15, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v14, v14, 3 op_sel_hi:[1,0] @@ -108519,7 +103571,7 @@ define <64 x half> @bitcast_v64i16_to_v64f16(<64 x i16> %a, i32 %b) { ; GFX11-NEXT: v_pk_add_u16 v18, v18, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v17, v17, 3 op_sel_hi:[1,0] ; GFX11-NEXT: v_pk_add_u16 v16, v16, 3 op_sel_hi:[1,0] -; GFX11-NEXT: .LBB55_2: ; %end +; GFX11-NEXT: .LBB54_2: ; %end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll index 03c6a36ac9861..c2cac55e13b09 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <4 x float> @bitcast_v4i32_to_v4f32(<4 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v4i32_to_v4f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.160bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.160bit.ll index e7262375fbeb0..ee6f2708990bb 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.160bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.160bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <5 x float> @bitcast_v5i32_to_v5f32(<5 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v5i32_to_v5f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll index 1185a12a474ea..b3d9e61b65b6f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define half @bitcast_i16_to_f16(i16 %a, i32 %b) { ; GCN-LABEL: bitcast_i16_to_f16: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.192bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.192bit.ll index 61f9232ea50a1..ecaf64567500f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.192bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.192bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <6 x float> @bitcast_v6i32_to_v6f32(<6 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v6i32_to_v6f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.224bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.224bit.ll index 952be022750a6..d2e46475487c2 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.224bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.224bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <7 x float> @bitcast_v7i32_to_v7f32(<7 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v7i32_to_v7f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll index 6e7b5dd33ea0b..b52d8a89035bc 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <8 x float> @bitcast_v8i32_to_v8f32(<8 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v8i32_to_v8f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.288bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.288bit.ll index 0adf547e19362..76f16189b7f97 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.288bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.288bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <9 x float> @bitcast_v9i32_to_v9f32(<9 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v9i32_to_v9f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll index f1e0c19f7fca3..05412efc100f6 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <10 x float> @bitcast_v10i32_to_v10f32(<10 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v10i32_to_v10f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll index 1c51395128917..332c971e5709f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define float @bitcast_i32_to_f32(i32 %a, i32 %b) { ; GCN-LABEL: bitcast_i32_to_f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.352bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.352bit.ll index 922a47ea77fcd..fa1f3bd96ad2f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.352bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.352bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <11 x float> @bitcast_v11i32_to_v11f32(<11 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v11i32_to_v11f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.384bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.384bit.ll index f67af98fba0fa..c7af60f324892 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.384bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.384bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <12 x float> @bitcast_v12i32_to_v12f32(<12 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v12i32_to_v12f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.448bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.448bit.ll index a5764f9da3194..b0fa6a21cd5f1 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.448bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.448bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <14 x i32> @bitcast_v7i64_to_v14i32(<7 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v7i64_to_v14i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll index d09aaf12161a2..9bb360f2e3b09 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.48bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <3 x half> @bitcast_v3bf16_to_v3f16(<3 x bfloat> %a, i32 %b) { ; GCN-LABEL: bitcast_v3bf16_to_v3f16: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll index f0ce1784eb107..0d1008082f586 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <16 x float> @bitcast_v16i32_to_v16f32(<16 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v16i32_to_v16f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll index 78f611d83b532..110c6109b1556 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.576bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <18 x i32> @bitcast_v9i64_to_v18i32(<9 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v9i64_to_v18i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll index 8d2501e42f2d1..bea2243e8087c 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.640bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <20 x i32> @bitcast_v10i64_to_v20i32(<10 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v10i64_to_v20i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll index 540888ab607b0..26ce1771e220d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define double @bitcast_i64_to_f64(i64 %a, i32 %b) { ; GCN-LABEL: bitcast_i64_to_f64: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll index 702c1d05a0e3e..05fb285362f09 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.704bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <22 x i32> @bitcast_v11i64_to_v22i32(<11 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v11i64_to_v22i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll index 182c63502d77b..45e9c321d4aac 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.768bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <24 x i32> @bitcast_v12i64_to_v24i32(<12 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v12i64_to_v24i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll index 9869dca91b4d4..4dcfaee680984 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.832bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <26 x i32> @bitcast_v13i64_to_v26i32(<13 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v13i64_to_v26i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll index 9f1a9c8dc89c3..37cf5b81b81e3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.896bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <28 x i32> @bitcast_v14i64_to_v28i32(<14 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v14i64_to_v28i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll index 80e5a18631189..ca0c0bb0d4ca2 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.960bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <30 x i32> @bitcast_v15i64_to_v30i32(<15 x i64> %a, i32 %b) { ; GCN-LABEL: bitcast_v15i64_to_v30i32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll index 7a3609e29a0c5..b87e7b0916032 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define <3 x float> @bitcast_v3i32_to_v3f32(<3 x i32> %a, i32 %b) { ; GCN-LABEL: bitcast_v3i32_to_v3f32: diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ptr.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ptr.ll index eb4429958dfa5..01a1e6b73ac6a 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ptr.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s define amdgpu_kernel void @bitcast_i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; GCN-LABEL: bitcast_i8ptr_v16i8ptr: