@@ -736,43 +736,37 @@ define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #
736736; GFX12-TRUE16-LABEL: constant_load_v16i16_align2:
737737; GFX12-TRUE16: ; %bb.0: ; %entry
738738; GFX12-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
739- ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v9 , 0
739+ ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v8 , 0
740740; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0
741741; GFX12-TRUE16-NEXT: s_clause 0x7
742- ; GFX12-TRUE16-NEXT: global_load_d16_b16 v0, v9, s[0:1] offset:16
743- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v0, v9, s[0:1] offset:12
744- ; GFX12-TRUE16-NEXT: global_load_d16_b16 v1, v9, s[0:1] offset:8
745- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v1, v9, s[0:1] offset:4
746- ; GFX12-TRUE16-NEXT: global_load_d16_b16 v4, v9, s[0:1] offset:28
747- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v4, v9, s[0:1] offset:24
748- ; GFX12-TRUE16-NEXT: global_load_d16_b16 v5, v9, s[0:1] offset:20
749- ; GFX12-TRUE16-NEXT: global_load_d16_b16 v8, v9, s[0:1]
750- ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x6
751- ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
752- ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x5
753- ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.l
742+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v3, v8, s[0:1] offset:28
743+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v2, v8, s[0:1] offset:24
744+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v1, v8, s[0:1] offset:20
745+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v0, v8, s[0:1] offset:16
746+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v7, v8, s[0:1] offset:12
747+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v6, v8, s[0:1] offset:8
748+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v5, v8, s[0:1] offset:4
749+ ; GFX12-TRUE16-NEXT: global_load_d16_b16 v4, v8, s[0:1]
750+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
751+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v3, v8, s[0:1] offset:30
752+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
753+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v2, v8, s[0:1] offset:26
754+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
755+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v1, v8, s[0:1] offset:22
756+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
757+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v0, v8, s[0:1] offset:18
758+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
759+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v7, v8, s[0:1] offset:14
760+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
761+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v6, v8, s[0:1] offset:10
762+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
763+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v5, v8, s[0:1] offset:6
764+ ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x7
765+ ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v4, v8, s[0:1] offset:2
754766; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x4
755- ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v1.l, v1.h
756- ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x3
757- ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v7.l, v4.l
758- ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x2
759- ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v6.l, v4.h
760- ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
761- ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
762- ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v8.l
763- ; GFX12-TRUE16-NEXT: s_clause 0x7
764- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v7, v9, s[0:1] offset:30
765- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v6, v9, s[0:1] offset:26
766- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v5, v9, s[0:1] offset:22
767- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v4, v9, s[0:1] offset:18
768- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v3, v9, s[0:1] offset:14
769- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v2, v9, s[0:1] offset:10
770- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v1, v9, s[0:1] offset:6
771- ; GFX12-TRUE16-NEXT: global_load_d16_hi_b16 v0, v9, s[0:1] offset:2
767+ ; GFX12-TRUE16-NEXT: global_store_b128 v[0:1], v[0:3], off
772768; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0
773- ; GFX12-TRUE16-NEXT: s_clause 0x1
774769; GFX12-TRUE16-NEXT: global_store_b128 v[0:1], v[4:7], off
775- ; GFX12-TRUE16-NEXT: global_store_b128 v[0:1], v[0:3], off
776770; GFX12-TRUE16-NEXT: s_endpgm
777771;
778772; GFX12-FAKE16-LABEL: constant_load_v16i16_align2:
0 commit comments