@@ -681,61 +681,72 @@ body: |
681681 ; GFX6-LABEL: name: load_global_s128
682682 ; GFX6: liveins: $vgpr0_vgpr1
683683 ; GFX6-NEXT: {{ $}}
684- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
685- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
686- ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
684+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
685+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
686+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
687+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
688+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
689+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
690+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
691+ ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
687692 ;
688693 ; GFX7-LABEL: name: load_global_s128
689694 ; GFX7: liveins: $vgpr0_vgpr1
690695 ; GFX7-NEXT: {{ $}}
691- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
692- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128), align 4, addrspace 1)
693- ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
696+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
697+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
698+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
699+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
700+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
701+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
702+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32>), align 4, addrspace 1)
703+ ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
694704 ;
695705 ; GFX7-FLAT-LABEL: name: load_global_s128
696706 ; GFX7-FLAT: liveins: $vgpr0_vgpr1
697707 ; GFX7-FLAT-NEXT: {{ $}}
698- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
699- ; GFX7-FLAT-NEXT: [[LOAD :%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
700- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
708+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
709+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32> ), align 4, addrspace 1)
710+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
701711 ;
702712 ; GFX8-LABEL: name: load_global_s128
703713 ; GFX8: liveins: $vgpr0_vgpr1
704714 ; GFX8-NEXT: {{ $}}
705- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
706- ; GFX8-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
707- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
715+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
716+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32> ), align 4, addrspace 1)
717+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
708718 ;
709719 ; GFX9-LABEL: name: load_global_s128
710720 ; GFX9: liveins: $vgpr0_vgpr1
711721 ; GFX9-NEXT: {{ $}}
712- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
713- ; GFX9-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
714- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
722+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
723+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
724+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
715725 ;
716726 ; GFX10-LABEL: name: load_global_s128
717727 ; GFX10: liveins: $vgpr0_vgpr1
718728 ; GFX10-NEXT: {{ $}}
719- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
720- ; GFX10-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
721- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
729+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
730+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
731+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
722732 ;
723733 ; GFX11-LABEL: name: load_global_s128
724734 ; GFX11: liveins: $vgpr0_vgpr1
725735 ; GFX11-NEXT: {{ $}}
726- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
727- ; GFX11-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
728- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
736+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
737+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
738+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
729739 ;
730740 ; GFX12-LABEL: name: load_global_s128
731741 ; GFX12: liveins: $vgpr0_vgpr1
732742 ; GFX12-NEXT: {{ $}}
733- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
734- ; GFX12-NEXT: [[LOAD :%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p1) :: (load (s128 ), align 4, addrspace 1)
735- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
743+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
744+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32> ), align 4, addrspace 1)
745+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
736746 %0:vgpr(p1) = COPY $vgpr0_vgpr1
737- %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 4, addrspace 1)
738- $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
747+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
748+ %2:vgpr(s128) = G_BITCAST %1(<4 x s32>)
749+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
739750
740751 ...
741752
@@ -989,61 +1000,72 @@ body: |
9891000 ; GFX6-LABEL: name: load_global_v2p3
9901001 ; GFX6: liveins: $vgpr0_vgpr1
9911002 ; GFX6-NEXT: {{ $}}
992- ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
993- ; GFX6-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
994- ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1003+ ; GFX6-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1004+ ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1005+ ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1006+ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
1007+ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1008+ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1009+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
1010+ ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
9951011 ;
9961012 ; GFX7-LABEL: name: load_global_v2p3
9971013 ; GFX7: liveins: $vgpr0_vgpr1
9981014 ; GFX7-NEXT: {{ $}}
999- ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1000- ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3>), addrspace 1)
1001- ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1015+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1016+ ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1017+ ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
1018+ ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
1019+ ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
1020+ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1021+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<2 x s32>), addrspace 1)
1022+ ; GFX7-NEXT: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
10021023 ;
10031024 ; GFX7-FLAT-LABEL: name: load_global_v2p3
10041025 ; GFX7-FLAT: liveins: $vgpr0_vgpr1
10051026 ; GFX7-FLAT-NEXT: {{ $}}
1006- ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1007- ; GFX7-FLAT-NEXT: [[LOAD :%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1008- ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1027+ ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1028+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32 >), addrspace 1)
1029+ ; GFX7-FLAT-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
10091030 ;
10101031 ; GFX8-LABEL: name: load_global_v2p3
10111032 ; GFX8: liveins: $vgpr0_vgpr1
10121033 ; GFX8-NEXT: {{ $}}
1013- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1014- ; GFX8-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1015- ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1034+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1035+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<2 x s32 >), addrspace 1)
1036+ ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
10161037 ;
10171038 ; GFX9-LABEL: name: load_global_v2p3
10181039 ; GFX9: liveins: $vgpr0_vgpr1
10191040 ; GFX9-NEXT: {{ $}}
1020- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1021- ; GFX9-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1022- ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1041+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1042+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1043+ ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
10231044 ;
10241045 ; GFX10-LABEL: name: load_global_v2p3
10251046 ; GFX10: liveins: $vgpr0_vgpr1
10261047 ; GFX10-NEXT: {{ $}}
1027- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1028- ; GFX10-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1029- ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1048+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1049+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1050+ ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
10301051 ;
10311052 ; GFX11-LABEL: name: load_global_v2p3
10321053 ; GFX11: liveins: $vgpr0_vgpr1
10331054 ; GFX11-NEXT: {{ $}}
1034- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1035- ; GFX11-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1036- ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1055+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1056+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1057+ ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
10371058 ;
10381059 ; GFX12-LABEL: name: load_global_v2p3
10391060 ; GFX12: liveins: $vgpr0_vgpr1
10401061 ; GFX12-NEXT: {{ $}}
1041- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1042- ; GFX12-NEXT: [[LOAD :%[0-9]+]]:vgpr(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load (<2 x p3 >), addrspace 1)
1043- ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
1062+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1063+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX2_ :%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load (<2 x s32 >), addrspace 1)
1064+ ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
10441065 %0:vgpr(p1) = COPY $vgpr0_vgpr1
1045- %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 8, addrspace 1)
1046- $vgpr0_vgpr1 = COPY %1
1066+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 8, addrspace 1)
1067+ %2:vgpr(<2 x p3>) = G_BITCAST %1(<2 x s32>)
1068+ $vgpr0_vgpr1 = COPY %2
10471069
10481070 ...
10491071
@@ -1231,7 +1253,7 @@ body: |
12311253 ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
12321254 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
12331255 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1234- ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x s16 >), align 4, addrspace 1)
1256+ ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
12351257 ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
12361258 ;
12371259 ; GFX7-LABEL: name: load_global_v8s16
@@ -1243,52 +1265,53 @@ body: |
12431265 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
12441266 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
12451267 ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
1246- ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<8 x s16 >), align 4, addrspace 1)
1268+ ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
12471269 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
12481270 ;
12491271 ; GFX7-FLAT-LABEL: name: load_global_v8s16
12501272 ; GFX7-FLAT: liveins: $vgpr0_vgpr1
12511273 ; GFX7-FLAT-NEXT: {{ $}}
12521274 ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1253- ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16 >), align 4, addrspace 1)
1275+ ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32 >), align 4, addrspace 1)
12541276 ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
12551277 ;
12561278 ; GFX8-LABEL: name: load_global_v8s16
12571279 ; GFX8: liveins: $vgpr0_vgpr1
12581280 ; GFX8-NEXT: {{ $}}
1259- ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1260- ; GFX8-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1261- ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1281+ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1282+ ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<4 x s32 >), align 4, addrspace 1)
1283+ ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
12621284 ;
12631285 ; GFX9-LABEL: name: load_global_v8s16
12641286 ; GFX9: liveins: $vgpr0_vgpr1
12651287 ; GFX9-NEXT: {{ $}}
1266- ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1267- ; GFX9-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1268- ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1288+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1289+ ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1290+ ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
12691291 ;
12701292 ; GFX10-LABEL: name: load_global_v8s16
12711293 ; GFX10: liveins: $vgpr0_vgpr1
12721294 ; GFX10-NEXT: {{ $}}
1273- ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1274- ; GFX10-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1275- ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1295+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1296+ ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1297+ ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
12761298 ;
12771299 ; GFX11-LABEL: name: load_global_v8s16
12781300 ; GFX11: liveins: $vgpr0_vgpr1
12791301 ; GFX11-NEXT: {{ $}}
1280- ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1281- ; GFX11-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1282- ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1302+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1303+ ; GFX11-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1304+ ; GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
12831305 ;
12841306 ; GFX12-LABEL: name: load_global_v8s16
12851307 ; GFX12: liveins: $vgpr0_vgpr1
12861308 ; GFX12-NEXT: {{ $}}
1287- ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
1288- ; GFX12-NEXT: [[LOAD :%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16 >), align 4, addrspace 1)
1289- ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
1309+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
1310+ ; GFX12-NEXT: [[GLOBAL_LOAD_DWORDX4_ :%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32 >), align 4, addrspace 1)
1311+ ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
12901312 %0:vgpr(p1) = COPY $vgpr0_vgpr1
1291- %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1)
1313+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 4, addrspace 1)
1314+ %2:vgpr(<8 x s16>) = G_BITCAST %1(<4 x s32>)
12921315 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
12931316
12941317 ...
0 commit comments