44target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
55target triple = "x86_64-unknown-linux-gnu"
66
7+ ; FIXME: !llvm.access.group should be preserved, loop should be vectorized.
78; End-to-end test for https://github.com/llvm/llvm-project/issues/115595.
89define void @test (i32 noundef %nface , i32 noundef %ncell , ptr noalias noundef %face_cell , ptr noalias noundef %x , ptr noalias noundef %y ) #0 {
910; CHECK-LABEL: define void @test(
@@ -14,16 +15,16 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
1415; CHECK: [[FOR_BODY_PREHEADER]]:
1516; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
1617; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]]
17- ; CHECK-NEXT: [[MIN_ITERS_CHECK :%.*]] = icmp ult i32 [[NFACE]], 4
18- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK ]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
18+ ; CHECK-NEXT: [[TMP1 :%.*]] = icmp ult i32 [[NFACE]], 4
19+ ; CHECK-NEXT: br i1 [[TMP1 ]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
1920; CHECK: [[VECTOR_PH]]:
20- ; CHECK-NEXT: [[N_VEC :%.*]] = and i64 [[TMP0]], 2147483644
21+ ; CHECK-NEXT: [[UNROLL_ITER :%.*]] = and i64 [[TMP0]], 2147483644
2122; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2223; CHECK: [[VECTOR_BODY]]:
23- ; CHECK-NEXT: [[INDEX :%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
24- ; CHECK-NEXT: [[TMP1 :%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDEX ]]
25- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1 ]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
26- ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDEX ]]
24+ ; CHECK-NEXT: [[INDVARS_IV_EPIL :%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25+ ; CHECK-NEXT: [[TMP10 :%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL ]]
26+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10 ]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
27+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL ]]
2728; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
2829; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
2930; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
@@ -34,14 +35,14 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
3435; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
3536; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
3637; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
37- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX ]], 4
38- ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC ]]
38+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL ]], 4
39+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER ]]
3940; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
4041; CHECK: [[MIDDLE_BLOCK]]:
41- ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC ]], [[TMP0]]
42+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER ]], [[TMP0]]
4243; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
4344; CHECK: [[FOR_BODY_PREHEADER14]]:
44- ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC ]], %[[MIDDLE_BLOCK]] ]
45+ ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER ]], %[[MIDDLE_BLOCK]] ]
4546; CHECK-NEXT: br label %[[FOR_BODY:.*]]
4647; CHECK: [[FOR_COND_CLEANUP]]:
4748; CHECK-NEXT: ret void
0 commit comments