Skip to content

Commit 44fc1ee

Browse files
committed
[Transform][LoadStoreVectorizer] fix tests after allowing partial redundant
1 parent ec6cce8 commit 44fc1ee

File tree

2 files changed

+90
-108
lines changed

2 files changed

+90
-108
lines changed

llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -593,14 +593,10 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
593593
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
594594
; FLATSCR-NEXT: scratch_store_short off, v0, s0 offset:4
595595
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
596-
; FLATSCR-NEXT: scratch_load_ushort v0, off, s0 offset:2
597-
; FLATSCR-NEXT: scratch_load_ushort v3, off, s0
598-
; FLATSCR-NEXT: s_waitcnt vmcnt(1)
599-
; FLATSCR-NEXT: v_mov_b32_e32 v1, v0
596+
; FLATSCR-NEXT: scratch_load_dword v0, off, s0
597+
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
598+
; FLATSCR-NEXT: v_lshrrev_b32_e32 v1, 16, v0
600599
; FLATSCR-NEXT: scratch_load_short_d16_hi v1, off, s0 offset:4
601-
; FLATSCR-NEXT: s_mov_b32 s0, 0x5040100
602-
; FLATSCR-NEXT: s_waitcnt vmcnt(1)
603-
; FLATSCR-NEXT: v_perm_b32 v0, v0, v3, s0
604600
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
605601
; FLATSCR-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
606602
; FLATSCR-NEXT: s_endpgm
@@ -660,13 +656,9 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
660656
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
661657
; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s0 offset:4
662658
; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
663-
; FLATSCR_GFX10-NEXT: s_clause 0x1
664-
; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, off, s0 offset:2
665-
; FLATSCR_GFX10-NEXT: scratch_load_ushort v3, off, s0
666-
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(1)
667-
; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v1, v0
659+
; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s0
668660
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
669-
; FLATSCR_GFX10-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
661+
; FLATSCR_GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0
670662
; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, off, s0 offset:4
671663
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
672664
; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
@@ -689,12 +681,9 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
689681
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
690682
; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
691683
; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
692-
; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v3, off, off offset:2
684+
; GFX11-TRUE16-NEXT: scratch_load_b32 v0, off, off
693685
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
694-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v3
695-
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
696-
; GFX11-TRUE16-NEXT: s_clause 0x1
697-
; GFX11-TRUE16-NEXT: scratch_load_d16_b16 v0, off, off
686+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
698687
; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
699688
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
700689
; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[2:3]
@@ -717,13 +706,9 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
717706
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
718707
; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
719708
; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
720-
; GFX11-FAKE16-NEXT: s_clause 0x1
721-
; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, off offset:2
722-
; GFX11-FAKE16-NEXT: scratch_load_u16 v3, off, off
723-
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
724-
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v0
709+
; GFX11-FAKE16-NEXT: scratch_load_b32 v0, off, off
725710
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
726-
; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
711+
; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
727712
; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
728713
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
729714
; GFX11-FAKE16-NEXT: global_store_b64 v2, v[0:1], s[2:3]

0 commit comments

Comments
 (0)