@@ -587,34 +587,63 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i
587587; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
588588; FLATSCR_GFX10-NEXT: s_endpgm
589589;
590- ; GFX11-LABEL: vload2_private:
591- ; GFX11: ; %bb.0: ; %entry
592- ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
593- ; GFX11-NEXT: v_mov_b32_e32 v2, 0
594- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
595- ; GFX11-NEXT: global_load_u16 v0, v2, s[0:1]
596- ; GFX11-NEXT: s_waitcnt vmcnt(0)
597- ; GFX11-NEXT: scratch_store_b16 off, v0, off dlc
598- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
599- ; GFX11-NEXT: global_load_u16 v0, v2, s[0:1] offset:2
600- ; GFX11-NEXT: s_waitcnt vmcnt(0)
601- ; GFX11-NEXT: scratch_store_b16 off, v0, off offset:2 dlc
602- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
603- ; GFX11-NEXT: global_load_u16 v0, v2, s[0:1] offset:4
604- ; GFX11-NEXT: s_waitcnt vmcnt(0)
605- ; GFX11-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
606- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
607- ; GFX11-NEXT: s_clause 0x1
608- ; GFX11-NEXT: scratch_load_u16 v0, off, off offset:2
609- ; GFX11-NEXT: scratch_load_u16 v3, off, off
610- ; GFX11-NEXT: s_waitcnt vmcnt(1)
611- ; GFX11-NEXT: v_mov_b32_e32 v1, v0
612- ; GFX11-NEXT: s_waitcnt vmcnt(0)
613- ; GFX11-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
614- ; GFX11-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
615- ; GFX11-NEXT: s_waitcnt vmcnt(0)
616- ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3]
617- ; GFX11-NEXT: s_endpgm
590+ ; GFX11-TRUE16-LABEL: vload2_private:
591+ ; GFX11-TRUE16: ; %bb.0: ; %entry
592+ ; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
593+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
594+ ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
595+ ; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1]
596+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
597+ ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off dlc
598+ ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
599+ ; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:2
600+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
601+ ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:2 dlc
602+ ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
603+ ; GFX11-TRUE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:4
604+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
605+ ; GFX11-TRUE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
606+ ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
607+ ; GFX11-TRUE16-NEXT: s_clause 0x1
608+ ; GFX11-TRUE16-NEXT: scratch_load_u16 v3, off, off offset:2
609+ ; GFX11-TRUE16-NEXT: scratch_load_u16 v0, off, off
610+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(1)
611+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v3
612+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
613+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
614+ ; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
615+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
616+ ; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[2:3]
617+ ; GFX11-TRUE16-NEXT: s_endpgm
618+ ;
619+ ; GFX11-FAKE16-LABEL: vload2_private:
620+ ; GFX11-FAKE16: ; %bb.0: ; %entry
621+ ; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
622+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0
623+ ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
624+ ; GFX11-FAKE16-NEXT: global_load_u16 v0, v2, s[0:1]
625+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
626+ ; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off dlc
627+ ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
628+ ; GFX11-FAKE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:2
629+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
630+ ; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off offset:2 dlc
631+ ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
632+ ; GFX11-FAKE16-NEXT: global_load_u16 v0, v2, s[0:1] offset:4
633+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
634+ ; GFX11-FAKE16-NEXT: scratch_store_b16 off, v0, off offset:4 dlc
635+ ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
636+ ; GFX11-FAKE16-NEXT: s_clause 0x1
637+ ; GFX11-FAKE16-NEXT: scratch_load_u16 v0, off, off offset:2
638+ ; GFX11-FAKE16-NEXT: scratch_load_u16 v3, off, off
639+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(1)
640+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v0
641+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
642+ ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
643+ ; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4
644+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
645+ ; GFX11-FAKE16-NEXT: global_store_b64 v2, v[0:1], s[2:3]
646+ ; GFX11-FAKE16-NEXT: s_endpgm
618647entry:
619648 %loc = alloca [3 x i16 ], align 2 , addrspace (5 )
620649 %tmp = load i16 , ptr addrspace (1 ) %in , align 2
@@ -968,16 +997,27 @@ define <2 x i16> @chain_hi_to_lo_group_may_alias_store(ptr addrspace(3) %ptr, pt
968997; GFX10-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
969998; GFX10-NEXT: s_setpc_b64 s[30:31]
970999;
971- ; GFX11-LABEL: chain_hi_to_lo_group_may_alias_store:
972- ; GFX11: ; %bb.0: ; %bb
973- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
974- ; GFX11-NEXT: v_mov_b32_e32 v2, 0x7b
975- ; GFX11-NEXT: ds_load_u16 v3, v0
976- ; GFX11-NEXT: ds_store_b16 v1, v2
977- ; GFX11-NEXT: ds_load_u16 v0, v0 offset:2
978- ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
979- ; GFX11-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
980- ; GFX11-NEXT: s_setpc_b64 s[30:31]
1000+ ; GFX11-TRUE16-LABEL: chain_hi_to_lo_group_may_alias_store:
1001+ ; GFX11-TRUE16: ; %bb.0: ; %bb
1002+ ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003+ ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0x7b
1004+ ; GFX11-TRUE16-NEXT: ds_load_u16 v3, v0
1005+ ; GFX11-TRUE16-NEXT: ds_store_b16 v1, v2
1006+ ; GFX11-TRUE16-NEXT: ds_load_u16 v0, v0 offset:2
1007+ ; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1008+ ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v3.l
1009+ ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
1010+ ;
1011+ ; GFX11-FAKE16-LABEL: chain_hi_to_lo_group_may_alias_store:
1012+ ; GFX11-FAKE16: ; %bb.0: ; %bb
1013+ ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014+ ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0x7b
1015+ ; GFX11-FAKE16-NEXT: ds_load_u16 v3, v0
1016+ ; GFX11-FAKE16-NEXT: ds_store_b16 v1, v2
1017+ ; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:2
1018+ ; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1019+ ; GFX11-FAKE16-NEXT: v_perm_b32 v0, v3, v0, 0x5040100
1020+ ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
9811021bb:
9821022 %gep_lo = getelementptr inbounds i16 , ptr addrspace (3 ) %ptr , i64 1
9831023 %load_hi = load i16 , ptr addrspace (3 ) %ptr
0 commit comments