@@ -926,53 +926,17 @@ define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef
926926; CHECK-NEXT: [[ENTRY:.*]]:
927927; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4
928928; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
929- ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
930- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
931- ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
932- ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <4 x i32> zeroinitializer
929+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <16 x i32> zeroinitializer
933930; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
934931; CHECK: [[VECTOR_BODY]]:
935932; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
936933; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
937934; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]]
938935; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4
939- ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
940- ; CHECK-NEXT: [[STRIDED_VEC12:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
941- ; CHECK-NEXT: [[STRIDED_VEC13:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
942- ; CHECK-NEXT: [[STRIDED_VEC14:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
943- ; CHECK-NEXT: [[STRIDED_VEC15:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
944- ; CHECK-NEXT: [[STRIDED_VEC16:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
945- ; CHECK-NEXT: [[STRIDED_VEC17:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
946- ; CHECK-NEXT: [[STRIDED_VEC18:%.*]] = shufflevector <16 x float> [[WIDE_VEC]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
947936; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]]
948937; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4
949- ; CHECK-NEXT: [[STRIDED_VEC20:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 0, i32 8>
950- ; CHECK-NEXT: [[STRIDED_VEC21:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 1, i32 9>
951- ; CHECK-NEXT: [[STRIDED_VEC22:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 2, i32 10>
952- ; CHECK-NEXT: [[STRIDED_VEC23:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 3, i32 11>
953- ; CHECK-NEXT: [[STRIDED_VEC24:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 4, i32 12>
954- ; CHECK-NEXT: [[STRIDED_VEC25:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 5, i32 13>
955- ; CHECK-NEXT: [[STRIDED_VEC26:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 6, i32 14>
956- ; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x float> [[WIDE_VEC19]], <16 x float> poison, <2 x i32> <i32 7, i32 15>
957- ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[STRIDED_VEC20]], <2 x float> [[STRIDED_VEC21]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
958- ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[STRIDED_VEC]], <2 x float> [[STRIDED_VEC12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
959- ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[TMP8]], [[TMP1]]
960- ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP9]]
961- ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[STRIDED_VEC22]], <2 x float> [[STRIDED_VEC23]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
962- ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[STRIDED_VEC13]], <2 x float> [[STRIDED_VEC14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
963- ; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[TMP12]], [[TMP2]]
964- ; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP13]]
965- ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[STRIDED_VEC24]], <2 x float> [[STRIDED_VEC25]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
966- ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[STRIDED_VEC15]], <2 x float> [[STRIDED_VEC16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
967- ; CHECK-NEXT: [[TMP17:%.*]] = fmul fast <4 x float> [[TMP16]], [[TMP3]]
968- ; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP17]]
969- ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x float> [[STRIDED_VEC26]], <2 x float> [[STRIDED_VEC27]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
970- ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[STRIDED_VEC17]], <2 x float> [[STRIDED_VEC18]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
971- ; CHECK-NEXT: [[TMP21:%.*]] = fmul fast <4 x float> [[TMP20]], [[TMP4]]
972- ; CHECK-NEXT: [[TMP22:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP21]]
973- ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
974- ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x float> [[TMP18]], <4 x float> [[TMP22]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
975- ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x float> [[TMP23]], <8 x float> [[TMP24]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
938+ ; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]]
939+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC19]], [[TMP4]]
976940; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4
977941; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
978942; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144
0 commit comments