@@ -15,6 +15,13 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
1515; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1616; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[INDEX]], -1
1717; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
18+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
19+ ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
20+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
21+ ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 8
22+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
23+ ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
24+ ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1825; CHECK: middle.block:
1926; CHECK-NEXT: br label [[EXIT:%.*]]
2027; CHECK: scalar.ph:
@@ -54,6 +61,10 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
5461; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
5562; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
5663; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
64+ ; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
65+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
66+ ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
67+ ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
5768; CHECK: middle.block:
5869; CHECK-NEXT: br label [[EXIT:%.*]]
5970; CHECK: scalar.ph:
@@ -97,6 +108,11 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
97108; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1
98109; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP8]], splat (i64 42)
99110; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
111+ ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8
112+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
113+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
114+ ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
115+ ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
100116; CHECK: middle.block:
101117; CHECK-NEXT: br label [[EXIT:%.*]]
102118; CHECK: scalar.ph:
@@ -351,6 +367,11 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
351367; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP6]], i32 1
352368; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP8]], splat (i64 42)
353369; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
370+ ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP10]], align 8
371+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
372+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
373+ ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
374+ ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
354375; CHECK: middle.block:
355376; CHECK-NEXT: br label [[SCALAR_PH]]
356377; CHECK: scalar.ph:
0 commit comments