@@ -12,26 +12,79 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
1212; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
1313; CHECK: [[FOR_BODY_PREHEADER]]:
1414; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
15+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 4
16+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
17+ ; CHECK: [[VECTOR_PH]]:
18+ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
19+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0
20+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
21+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[Z]], i64 0
22+ ; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT19]], <2 x double> poison, <2 x i32> zeroinitializer
23+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
24+ ; CHECK: [[VECTOR_BODY]]:
25+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
27+ ; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
28+ ; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
29+ ; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
30+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
31+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 8
32+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
33+ ; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x float>, ptr [[TMP23]], align 4
34+ ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
35+ ; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD18]] to <2 x double>
36+ ; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP2]]
37+ ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP3]]
38+ ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP4]], [[BROADCAST_SPLAT20]]
39+ ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <2 x double> [[TMP5]], [[BROADCAST_SPLAT20]]
40+ ; CHECK-NEXT: [[TMP8:%.*]] = fcmp fast ogt <2 x double> [[TMP6]], zeroinitializer
41+ ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <2 x double> [[TMP7]], zeroinitializer
42+ ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP6]]
43+ ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]]
44+ ; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
45+ ; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
46+ ; CHECK-NEXT: [[TMP14]] = fadd fast <2 x double> [[TMP12]], [[VEC_PHI16]]
47+ ; CHECK-NEXT: [[TMP15]] = fadd fast <2 x double> [[TMP13]], [[VEC_PHI17]]
48+ ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
49+ ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
50+ ; CHECK-NEXT: [[TMP18]] = fadd fast <2 x double> [[TMP16]], [[VEC_PHI]]
51+ ; CHECK-NEXT: [[TMP19]] = fadd fast <2 x double> [[TMP17]], [[VEC_PHI15]]
52+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV]], 4
53+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54+ ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
55+ ; CHECK: [[MIDDLE_BLOCK]]:
56+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP19]], [[TMP18]]
57+ ; CHECK-NEXT: [[TMP21:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]])
58+ ; CHECK-NEXT: [[BIN_RDX21:%.*]] = fadd fast <2 x double> [[TMP15]], [[TMP14]]
59+ ; CHECK-NEXT: [[TMP22:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX21]])
60+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
61+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
62+ ; CHECK: [[FOR_BODY_PREHEADER23]]:
63+ ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
64+ ; CHECK-NEXT: [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
65+ ; CHECK-NEXT: [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
1566; CHECK-NEXT: br label %[[FOR_BODY:.*]]
1667; CHECK: [[FOR_BODY]]:
17- ; CHECK-NEXT: [[INDVARS_IV :%.*]] = phi i64 [ 0 , %[[FOR_BODY_PREHEADER ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
18- ; CHECK-NEXT: [[V1_011 :%.*]] = phi double [ 0.000000e+00 , %[[FOR_BODY_PREHEADER ]] ], [ [[V1_1:%.* ]], %[[FOR_BODY ]] ]
19- ; CHECK-NEXT: [[V0_010 :%.*]] = phi double [ 0.000000e+00 , %[[FOR_BODY_PREHEADER ]] ], [ [[V0_1:%.* ]], %[[FOR_BODY ]] ]
20- ; CHECK-NEXT: [[ARRAYIDX :%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV ]]
21- ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX ]], align 4
68+ ; CHECK-NEXT: [[INDVARS_IV1 :%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]] , %[[FOR_BODY ]] ], [ [[INDVARS_IV_PH ]], %[[FOR_BODY_PREHEADER23 ]] ]
69+ ; CHECK-NEXT: [[V1_012 :%.*]] = phi double [ [[V1_2:%.*]] , %[[FOR_BODY ]] ], [ [[V1_012_PH ]], %[[FOR_BODY_PREHEADER23 ]] ]
70+ ; CHECK-NEXT: [[V0_011 :%.*]] = phi double [ [[V0_2:%.*]] , %[[FOR_BODY ]] ], [ [[V0_011_PH ]], %[[FOR_BODY_PREHEADER23 ]] ]
71+ ; CHECK-NEXT: [[ARRAYIDX1 :%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1 ]]
72+ ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1 ]], align 4
2273; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
2374; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
2475; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
2576; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
26- ; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]]
2777; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
28- ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]]
29- ; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]]
30- ; CHECK-NEXT: [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]]
31- ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
78+ ; CHECK-NEXT: [[ADD8:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB]], double -0.000000e+00)
79+ ; CHECK-NEXT: [[V0_2]] = fadd fast double [[ADD8]], [[V0_011]]
80+ ; CHECK-NEXT: [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00
81+ ; CHECK-NEXT: [[V1_2]] = fadd fast double [[ADD4]], [[V1_012]]
82+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
3283; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
33- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.* ]], label %[[FOR_BODY]]
84+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+ ]]
3485; CHECK: [[FOR_END_LOOPEXIT]]:
86+ ; CHECK-NEXT: [[V0_1:%.*]] = phi double [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
87+ ; CHECK-NEXT: [[V1_1:%.*]] = phi double [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[V1_2]], %[[FOR_BODY]] ]
3588; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
3689; CHECK-NEXT: br label %[[FOR_END]]
3790; CHECK: [[FOR_END]]:
@@ -292,3 +345,9 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
292345declare void @resample (i32 noundef, ptr noundef)
293346declare double @llvm.exp2.f64 (double )
294347declare void @llvm.lifetime.end.p0 (i64 immarg, ptr nocapture )
348+ ;.
349+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
350+ ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
351+ ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
352+ ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
353+ ;.
0 commit comments