diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/external-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/external-shuffle.ll new file mode 100644 index 0000000000000..ce9e47a03dee3 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/external-shuffle.ll @@ -0,0 +1,261 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN %s + +; The insertelements in the exit block use the various parts of the vectorized tree. These external uses are just creating an identity vector using a sequence +; of insert elements. Since these insertelements are just recreating the same vectors that were produced during vectorization, they should not increase the cost of vectorization. + +define void @phi_4(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, ptr %out2, i32 %flag) { +; GCN-LABEL: define void @phi_4( +; GCN-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], ptr [[OUT2:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] { +; GCN-NEXT: [[ENTRY:.*]]: +; GCN-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(3) [[INPTR0]], align 8 +; GCN-NEXT: [[GEP2:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 2 +; GCN-NEXT: [[GEP3:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 3 +; GCN-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP2]], align 2 +; GCN-NEXT: [[GEP4:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 4 +; GCN-NEXT: [[GEP5:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 5 +; GCN-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP4]], align 8 +; GCN-NEXT: [[GEP6:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 6 +; GCN-NEXT: [[GEP7:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 7 +; GCN-NEXT: [[TMP3:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP6]], align 2 +; GCN-NEXT: [[GEP8:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 8 +; GCN-NEXT: [[GEP9:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 9 +; GCN-NEXT: [[TMP4:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP8]], align 8 +; GCN-NEXT: [[GEP10:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 10 +; GCN-NEXT: [[GEP11:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 11 +; GCN-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP10]], align 2 +; GCN-NEXT: [[GEP12:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 12 +; GCN-NEXT: [[GEP13:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 13 +; GCN-NEXT: [[TMP6:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP12]], align 8 +; GCN-NEXT: [[GEP14:%.*]] = getelementptr i16, ptr addrspace(3) [[INPTR0]], i32 14 +; GCN-NEXT: [[TMP7:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP14]], align 2 +; GCN-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0 +; GCN-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1 +; GCN-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 +; GCN-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1 +; GCN-NEXT: [[TMP12:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0 +; GCN-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1 +; GCN-NEXT: [[TMP14:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0 +; GCN-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1 +; GCN-NEXT: [[TMP24:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0 +; GCN-NEXT: [[TMP26:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1 +; GCN-NEXT: [[TMP28:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0 +; GCN-NEXT: [[TMP38:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1 +; GCN-NEXT: br label %[[DO_BODY:.*]] +; GCN: [[DO_BODY]]: +; GCN-NEXT: [[PHI2:%.*]] = phi i16 [ [[TMP8]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI3:%.*]] = phi i16 [ [[TMP9]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI4:%.*]] = phi i16 [ [[TMP10]], %[[ENTRY]] ], [ [[TMP39:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI5:%.*]] = phi i16 [ [[TMP11]], %[[ENTRY]] ], [ [[OTHERELE5:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI6:%.*]] = phi i16 [ [[TMP12]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI7:%.*]] = phi i16 [ [[TMP13]], %[[ENTRY]] ], [ [[OTHERELE7:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI8:%.*]] = phi i16 [ [[TMP14]], %[[ENTRY]] ], [ [[TMP40:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI9:%.*]] = phi i16 [ [[TMP15]], %[[ENTRY]] ], [ [[OTHERELE9:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI10:%.*]] = phi i16 [ [[TMP24]], %[[ENTRY]] ], [ [[TMP34:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI11:%.*]] = phi i16 [ [[TMP26]], %[[ENTRY]] ], [ [[OTHERELE11:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI12:%.*]] = phi i16 [ [[TMP28]], %[[ENTRY]] ], [ [[TMP35:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[PHI13:%.*]] = phi i16 [ [[TMP38]], %[[ENTRY]] ], [ [[OTHERELE13:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[TMP41:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP16:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[TMP42:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP23:%.*]], %[[DO_BODY]] ] +; GCN-NEXT: [[TMP16]] = load <2 x i16>, ptr addrspace(3) [[INPTR0]], align 8 +; GCN-NEXT: [[OTHERELE3]] = load i16, ptr addrspace(3) [[GEP3]], align 1 +; GCN-NEXT: [[TMP17:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP2]], align 2 +; GCN-NEXT: [[OTHERELE5]] = load i16, ptr addrspace(3) [[GEP5]], align 1 +; GCN-NEXT: [[TMP18:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP4]], align 8 +; GCN-NEXT: [[OTHERELE7]] = load i16, ptr addrspace(3) [[GEP7]], align 1 +; GCN-NEXT: [[TMP19:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP6]], align 2 +; GCN-NEXT: [[OTHERELE9]] = load i16, ptr addrspace(3) [[GEP9]], align 1 +; GCN-NEXT: [[TMP20:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP8]], align 8 +; GCN-NEXT: [[OTHERELE11]] = load i16, ptr addrspace(3) [[GEP11]], align 1 +; GCN-NEXT: [[TMP21:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP10]], align 2 +; GCN-NEXT: [[OTHERELE13]] = load i16, ptr addrspace(3) [[GEP13]], align 1 +; GCN-NEXT: [[TMP22:%.*]] = load <2 x i16>, ptr addrspace(3) [[GEP12]], align 8 +; GCN-NEXT: [[TMP23]] = load <2 x i16>, ptr addrspace(3) [[GEP14]], align 2 +; GCN-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0 +; GCN-NEXT: [[TMP30]] = extractelement <2 x i16> [[TMP17]], i32 0 +; GCN-NEXT: [[TMP39]] = extractelement <2 x i16> [[TMP18]], i32 0 +; GCN-NEXT: [[TMP32]] = extractelement <2 x i16> [[TMP19]], i32 0 +; GCN-NEXT: [[TMP40]] = extractelement <2 x i16> [[TMP20]], i32 0 +; GCN-NEXT: [[TMP34]] = extractelement <2 x i16> [[TMP21]], i32 0 +; GCN-NEXT: [[TMP35]] = extractelement <2 x i16> [[TMP22]], i32 0 +; GCN-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]] +; GCN: [[EXIT]]: +; GCN-NEXT: [[TMP36:%.*]] = shufflevector <2 x i16> [[TMP16]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[TMP37:%.*]] = shufflevector <2 x i16> [[TMP17]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC038:%.*]] = shufflevector <16 x i16> [[TMP36]], <16 x i16> [[TMP37]], <16 x i32> +; GCN-NEXT: [[TMP25:%.*]] = shufflevector <2 x i16> [[TMP18]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC059:%.*]] = shufflevector <16 x i16> [[VEC038]], <16 x i16> [[TMP25]], <16 x i32> +; GCN-NEXT: [[TMP27:%.*]] = shufflevector <2 x i16> [[TMP19]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC0710:%.*]] = shufflevector <16 x i16> [[VEC059]], <16 x i16> [[TMP27]], <16 x i32> +; GCN-NEXT: [[TMP29:%.*]] = shufflevector <2 x i16> [[TMP20]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC0911:%.*]] = shufflevector <16 x i16> [[VEC0710]], <16 x i16> [[TMP29]], <16 x i32> +; GCN-NEXT: [[TMP31:%.*]] = shufflevector <2 x i16> [[TMP21]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC01112:%.*]] = shufflevector <16 x i16> [[VEC0911]], <16 x i16> [[TMP31]], <16 x i32> +; GCN-NEXT: [[TMP33:%.*]] = shufflevector <2 x i16> [[TMP22]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[TMP58:%.*]] = shufflevector <16 x i16> [[VEC01112]], <16 x i16> [[TMP33]], <16 x i32> +; GCN-NEXT: [[TMP60:%.*]] = shufflevector <2 x i16> [[TMP23]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC2157:%.*]] = shufflevector <16 x i16> [[TMP58]], <16 x i16> [[TMP60]], <16 x i32> +; GCN-NEXT: [[TMP50:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[TMP51:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC231:%.*]] = shufflevector <16 x i16> [[TMP50]], <16 x i16> [[TMP51]], <16 x i32> +; GCN-NEXT: [[TMP52:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC252:%.*]] = shufflevector <16 x i16> [[VEC231]], <16 x i16> [[TMP52]], <16 x i32> +; GCN-NEXT: [[TMP53:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC273:%.*]] = shufflevector <16 x i16> [[VEC252]], <16 x i16> [[TMP53]], <16 x i32> +; GCN-NEXT: [[TMP54:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC294:%.*]] = shufflevector <16 x i16> [[VEC273]], <16 x i16> [[TMP54]], <16 x i32> +; GCN-NEXT: [[TMP55:%.*]] = shufflevector <2 x i16> [[TMP5]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC2115:%.*]] = shufflevector <16 x i16> [[VEC294]], <16 x i16> [[TMP55]], <16 x i32> +; GCN-NEXT: [[TMP56:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC2136:%.*]] = shufflevector <16 x i16> [[VEC2115]], <16 x i16> [[TMP56]], <16 x i32> +; GCN-NEXT: [[TMP59:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC2151:%.*]] = shufflevector <16 x i16> [[VEC2136]], <16 x i16> [[TMP59]], <16 x i32> +; GCN-NEXT: [[TMP57:%.*]] = shufflevector <2 x i16> [[TMP41]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC22:%.*]] = insertelement <16 x i16> [[TMP57]], i16 [[PHI2]], i64 2 +; GCN-NEXT: [[VEC23:%.*]] = insertelement <16 x i16> [[VEC22]], i16 [[PHI3]], i64 3 +; GCN-NEXT: [[VEC24:%.*]] = insertelement <16 x i16> [[VEC23]], i16 [[PHI4]], i64 4 +; GCN-NEXT: [[VEC25:%.*]] = insertelement <16 x i16> [[VEC24]], i16 [[PHI5]], i64 5 +; GCN-NEXT: [[VEC26:%.*]] = insertelement <16 x i16> [[VEC25]], i16 [[PHI6]], i64 6 +; GCN-NEXT: [[VEC27:%.*]] = insertelement <16 x i16> [[VEC26]], i16 [[PHI7]], i64 7 +; GCN-NEXT: [[VEC28:%.*]] = insertelement <16 x i16> [[VEC27]], i16 [[PHI8]], i64 8 +; GCN-NEXT: [[VEC29:%.*]] = insertelement <16 x i16> [[VEC28]], i16 [[PHI9]], i64 9 +; GCN-NEXT: [[VEC210:%.*]] = insertelement <16 x i16> [[VEC29]], i16 [[PHI10]], i64 10 +; GCN-NEXT: [[VEC211:%.*]] = insertelement <16 x i16> [[VEC210]], i16 [[PHI11]], i64 11 +; GCN-NEXT: [[VEC212:%.*]] = insertelement <16 x i16> [[VEC211]], i16 [[PHI12]], i64 12 +; GCN-NEXT: [[VEC213:%.*]] = insertelement <16 x i16> [[VEC212]], i16 [[PHI13]], i64 13 +; GCN-NEXT: [[TMP61:%.*]] = shufflevector <2 x i16> [[TMP42]], <2 x i16> poison, <16 x i32> +; GCN-NEXT: [[VEC2152:%.*]] = shufflevector <16 x i16> [[VEC213]], <16 x i16> [[TMP61]], <16 x i32> +; GCN-NEXT: store <16 x i16> [[VEC2151]], ptr [[OUT]], align 32 +; GCN-NEXT: store <16 x i16> [[VEC2157]], ptr [[OUT1]], align 32 +; GCN-NEXT: store <16 x i16> [[VEC2152]], ptr [[OUT2]], align 32 +; GCN-NEXT: ret void +; +entry: + %ele0 = load i16, ptr addrspace(3) %inptr0, align 8 + %gep1 = getelementptr i16, ptr addrspace(3) %inptr0, i32 1 + %ele1 = load i16, ptr addrspace(3) %gep1, align 1 + %gep2 = getelementptr i16, ptr addrspace(3) %inptr0, i32 2 + %ele2 = load i16, ptr addrspace(3) %gep2, align 2 + %gep3 = getelementptr i16, ptr addrspace(3) %inptr0, i32 3 + %ele3 = load i16, ptr addrspace(3) %gep3, align 1 + %gep4 = getelementptr i16, ptr addrspace(3) %inptr0, i32 4 + %ele4 = load i16, ptr addrspace(3) %gep4, align 8 + %gep5 = getelementptr i16, ptr addrspace(3) %inptr0, i32 5 + %ele5 = load i16, ptr addrspace(3) %gep5, align 1 + %gep6 = getelementptr i16, ptr addrspace(3) %inptr0, i32 6 + %ele6 = load i16, ptr addrspace(3) %gep6, align 2 + %gep7 = getelementptr i16, ptr addrspace(3) %inptr0, i32 7 + %ele7 = load i16, ptr addrspace(3) %gep7, align 1 + %gep8 = getelementptr i16, ptr addrspace(3) %inptr0, i32 8 + %ele8 = load i16, ptr addrspace(3) %gep8, align 8 + %gep9 = getelementptr i16, ptr addrspace(3) %inptr0, i32 9 + %ele9 = load i16, ptr addrspace(3) %gep9, align 1 + %gep10 = getelementptr i16, ptr addrspace(3) %inptr0, i32 10 + %ele10 = load i16, ptr addrspace(3) %gep10, align 2 + %gep11 = getelementptr i16, ptr addrspace(3) %inptr0, i32 11 + %ele11 = load i16, ptr addrspace(3) %gep11, align 1 + %gep12 = getelementptr i16, ptr addrspace(3) %inptr0, i32 12 + %ele12 = load i16, ptr addrspace(3) %gep12, align 8 + %gep13 = getelementptr i16, ptr addrspace(3) %inptr0, i32 13 + %ele13 = load i16, ptr addrspace(3) %gep13, align 1 + %gep14 = getelementptr i16, ptr addrspace(3) %inptr0, i32 14 + %ele14 = load i16, ptr addrspace(3) %gep14, align 2 + %gep15 = getelementptr i16, ptr addrspace(3) %inptr0, i32 15 + %ele15 = load i16, ptr addrspace(3) %gep15, align 1 + br label %do.body + +do.body: + %phi0 = phi i16 [ %ele0, %entry ], [ %otherele0, %do.body ] + %phi1 = phi i16 [ %ele1, %entry ], [ %otherele1, %do.body ] + %phi2 = phi i16 [ %ele2, %entry ], [ %otherele2, %do.body ] + %phi3 = phi i16 [ %ele3, %entry ], [ %otherele3, %do.body ] + %phi4 = phi i16 [ %ele4, %entry ], [ %otherele4, %do.body ] + %phi5 = phi i16 [ %ele5, %entry ], [ %otherele5, %do.body ] + %phi6 = phi i16 [ %ele6, %entry ], [ %otherele6, %do.body ] + %phi7 = phi i16 [ %ele7, %entry ], [ %otherele7, %do.body ] + %phi8 = phi i16 [ %ele8, %entry ], [ %otherele8, %do.body ] + %phi9 = phi i16 [ %ele9, %entry ], [ %otherele9, %do.body ] + %phi10 = phi i16 [ %ele10, %entry ], [ %otherele10, %do.body ] + %phi11 = phi i16 [ %ele11, %entry ], [ %otherele11, %do.body ] + %phi12 = phi i16 [ %ele12, %entry ], [ %otherele12, %do.body ] + %phi13 = phi i16 [ %ele13, %entry ], [ %otherele13, %do.body ] + %phi14 = phi i16 [ %ele14, %entry ], [ %otherele14, %do.body ] + %phi15 = phi i16 [ %ele15, %entry ], [ %otherele15, %do.body ] + + %otherele0 = load i16, ptr addrspace(3) %inptr0, align 8 + %otherele1 = load i16, ptr addrspace(3) %gep1, align 1 + %otherele2 = load i16, ptr addrspace(3) %gep2, align 2 + %otherele3 = load i16, ptr addrspace(3) %gep3, align 1 + %otherele4 = load i16, ptr addrspace(3) %gep4, align 8 + %otherele5 = load i16, ptr addrspace(3) %gep5, align 1 + %otherele6 = load i16, ptr addrspace(3) %gep6, align 2 + %otherele7 = load i16, ptr addrspace(3) %gep7, align 1 + %otherele8 = load i16, ptr addrspace(3) %gep8, align 8 + %otherele9 = load i16, ptr addrspace(3) %gep9, align 1 + %otherele10 = load i16, ptr addrspace(3) %gep10, align 2 + %otherele11 = load i16, ptr addrspace(3) %gep11, align 1 + %otherele12 = load i16, ptr addrspace(3) %gep12, align 8 + %otherele13 = load i16, ptr addrspace(3) %gep13, align 1 + %otherele14 = load i16, ptr addrspace(3) %gep14, align 2 + %otherele15 = load i16, ptr addrspace(3) %gep15, align 1 + %cmp = icmp eq i32 %flag, 0 + br i1 %cmp, label %exit, label %do.body + +exit: + %vec00 = insertelement <16 x i16> poison, i16 %otherele0, i64 0 + %vec01 = insertelement <16 x i16> %vec00, i16 %otherele1, i64 1 + %vec02 = insertelement <16 x i16> %vec01, i16 %otherele2, i64 2 + %vec03 = insertelement <16 x i16> %vec02, i16 %otherele3, i64 3 + %vec04 = insertelement <16 x i16> %vec03, i16 %otherele4, i64 4 + %vec05 = insertelement <16 x i16> %vec04, i16 %otherele5, i64 5 + %vec06 = insertelement <16 x i16> %vec05, i16 %otherele6, i64 6 + %vec07 = insertelement <16 x i16> %vec06, i16 %otherele7, i64 7 + %vec08 = insertelement <16 x i16> %vec07, i16 %otherele8, i64 8 + %vec09 = insertelement <16 x i16> %vec08, i16 %otherele9, i64 9 + %vec010 = insertelement <16 x i16> %vec09, i16 %otherele10, i64 10 + %vec011 = insertelement <16 x i16> %vec010, i16 %otherele11, i64 11 + %vec012 = insertelement <16 x i16> %vec011, i16 %otherele12, i64 12 + %vec013 = insertelement <16 x i16> %vec012, i16 %otherele13, i64 13 + %vec014 = insertelement <16 x i16> %vec013, i16 %otherele14, i64 14 + %vec015 = insertelement <16 x i16> %vec014, i16 %otherele15, i64 15 + + %vec10 = insertelement <16 x i16> poison, i16 %ele0, i64 0 + %vec11 = insertelement <16 x i16> %vec10, i16 %ele1, i64 1 + %vec12 = insertelement <16 x i16> %vec11, i16 %ele2, i64 2 + %vec13 = insertelement <16 x i16> %vec12, i16 %ele3, i64 3 + %vec14 = insertelement <16 x i16> %vec13, i16 %ele4, i64 4 + %vec15 = insertelement <16 x i16> %vec14, i16 %ele5, i64 5 + %vec16 = insertelement <16 x i16> %vec15, i16 %ele6, i64 6 + %vec17 = insertelement <16 x i16> %vec16, i16 %ele7, i64 7 + %vec18 = insertelement <16 x i16> %vec17, i16 %ele8, i64 8 + %vec19 = insertelement <16 x i16> %vec18, i16 %ele9, i64 9 + %vec110 = insertelement <16 x i16> %vec19, i16 %ele10, i64 10 + %vec111 = insertelement <16 x i16> %vec110, i16 %ele11, i64 11 + %vec112 = insertelement <16 x i16> %vec111, i16 %ele12, i64 12 + %vec113 = insertelement <16 x i16> %vec112, i16 %ele13, i64 13 + %vec114 = insertelement <16 x i16> %vec113, i16 %ele14, i64 14 + %vec115 = insertelement <16 x i16> %vec114, i16 %ele15, i64 15 + + %vec20 = insertelement <16 x i16> poison, i16 %phi0, i64 0 + %vec21 = insertelement <16 x i16> %vec20, i16 %phi1, i64 1 + %vec22 = insertelement <16 x i16> %vec21, i16 %phi2, i64 2 + %vec23 = insertelement <16 x i16> %vec22, i16 %phi3, i64 3 + %vec24 = insertelement <16 x i16> %vec23, i16 %phi4, i64 4 + %vec25 = insertelement <16 x i16> %vec24, i16 %phi5, i64 5 + %vec26 = insertelement <16 x i16> %vec25, i16 %phi6, i64 6 + %vec27 = insertelement <16 x i16> %vec26, i16 %phi7, i64 7 + %vec28 = insertelement <16 x i16> %vec27, i16 %phi8, i64 8 + %vec29 = insertelement <16 x i16> %vec28, i16 %phi9, i64 9 + %vec210 = insertelement <16 x i16> %vec29, i16 %phi10, i64 10 + %vec211 = insertelement <16 x i16> %vec210, i16 %phi11, i64 11 + %vec212 = insertelement <16 x i16> %vec211, i16 %phi12, i64 12 + %vec213 = insertelement <16 x i16> %vec212, i16 %phi13, i64 13 + %vec214 = insertelement <16 x i16> %vec213, i16 %phi14, i64 14 + %vec215 = insertelement <16 x i16> %vec214, i16 %phi15, i64 15 + + store <16 x i16> %vec115, ptr %out + store <16 x i16> %vec015, ptr %out1 + store <16 x i16> %vec215, ptr %out2 + + ret void +}