diff --git a/bench/abc/optimized/giaSif.ll b/bench/abc/optimized/giaSif.ll index d72d00db7a1..7ff972a5048 100644 --- a/bench/abc/optimized/giaSif.ll +++ b/bench/abc/optimized/giaSif.ll @@ -5412,8 +5412,8 @@ define void @Gia_ManSifPrintTimes(ptr noundef readonly captures(none) %0, ptr no %21 = zext i1 %20 to i32 %22 = add i32 %18, -65536 %23 = add i32 %22, %21 - %24 = tail call range(i32 -2147483648, 536870913) i32 @llvm.smin.i32(i32 %23, i32 255) - %25 = tail call noundef i32 @llvm.smax.i32(i32 %24, i32 -255) + %24 = tail call i32 @llvm.smax.i32(i32 %23, i32 -255) + %25 = tail call i32 @llvm.smin.i32(i32 %24, i32 255) %26 = icmp sgt i32 %23, -1 %27 = sub nsw i32 0, %25 %.sink = select i1 %26, i32 %25, i32 %27 diff --git a/bench/abc/optimized/giaUtil.ll b/bench/abc/optimized/giaUtil.ll index f3dc18e1c2b..25a423c7d0d 100644 --- a/bench/abc/optimized/giaUtil.ll +++ b/bench/abc/optimized/giaUtil.ll @@ -19322,8 +19322,8 @@ Vec_WrdStartTruthTables.exit: ; preds = %..loopexit28_crit_e br i1 %58, label %.lr.ph.preheader.i.us, label %Abc_TtFill.exit .lr.ph.preheader.i.us: ; preds = %.lr.ph95, %Abc_TtPrintBinary1.exit78.us - %.val54.val94.us = phi i32 [ %.val54.val.us, %Abc_TtPrintBinary1.exit78.us ], [ %.val54.val91, %.lr.ph95 ] - %.192.us = phi i32 [ %85, %Abc_TtPrintBinary1.exit78.us ], [ 0, %.lr.ph95 ] + %.val54.val94.us = phi i32 [ %.val54.val.us, %Abc_TtPrintBinary1.exit77.us ], [ %.val54.val91, %.lr.ph94 ] + %.192.us = phi i32 [ %85, %Abc_TtPrintBinary1.exit77.us ], [ 0, %.lr.ph94 ] tail call void @llvm.memset.p0.i64(ptr align 8 %50, i8 -1, i64 %60, i1 false), !tbaa !205 %62 = icmp sgt i32 %.val54.val94.us, 0 br i1 %62, label %.lr.ph89.us, label %._crit_edge.split.us.us @@ -19349,11 +19349,11 @@ Vec_WrdStartTruthTables.exit: ; preds = %..loopexit28_crit_e br label %.preheader.us.i73.us .preheader.us.i73.us: ; preds = %._crit_edge.us.i77.us, %.preheader.us.preheader.i71.us - %.014.us.i74.us = phi ptr [ %83, %._crit_edge.us.i77.us ], [ %50, %.preheader.us.preheader.i71.us ] + %.014.us.i74.us = phi ptr [ %83, %._crit_edge.us.i76.us ], [ %50, %.preheader.us.preheader.i71.us ] br label %73 73: ; preds = %73, %.preheader.us.i73.us - %.01213.us.i75.us = phi i32 [ 0, %.preheader.us.i73.us ], [ %82, %73 ] + %.01213.us.i75.us = phi i32 [ 0, %.preheader.us.i72.us ], [ %82, %73 ] %74 = lshr i32 %.01213.us.i75.us, 5 %75 = zext nneg i32 %74 to i64 %76 = getelementptr inbounds nuw i32, ptr %.014.us.i74.us, i64 %75 @@ -19387,7 +19387,7 @@ Abc_TtPrintBinary1.exit78.us: ; preds = %._crit_edge.us.i77. br label %89 89: ; preds = %Abc_TtAndCompl.exit.us.us, %.lr.ph89.us - %indvars.iv103 = phi i64 [ %indvars.iv.next104, %Abc_TtAndCompl.exit.us.us ], [ 0, %.lr.ph89.us ] + %indvars.iv103 = phi i64 [ %indvars.iv.next104, %Abc_TtAndCompl.exit.us.us ], [ 0, %.lr.ph88.us ] %90 = mul nuw nsw i64 %indvars.iv103, %59 %91 = getelementptr inbounds nuw i64, ptr %.val63.us, i64 %90 %92 = trunc nuw nsw i64 %indvars.iv103 to i32 @@ -19482,7 +19482,7 @@ Abc_TtPrintBinary1.exit: ; preds = %._crit_edge.us.i, % br i1 %131, label %.lr.ph, label %.preheader, !llvm.loop !268 Abc_TtFill.exit: ; preds = %.lr.ph95, %Abc_TtPrintBinary1.exit78 - %.192 = phi i32 [ %154, %Abc_TtPrintBinary1.exit78 ], [ 0, %.lr.ph95 ] + %.192 = phi i32 [ %154, %Abc_TtPrintBinary1.exit77 ], [ 0, %.lr.ph94 ] %.val56 = load ptr, ptr %8, align 8, !tbaa !57 %132 = getelementptr i8, ptr %.val56, i64 4 %.val56.val = load i32, ptr %132, align 4, !tbaa !58 @@ -19503,11 +19503,11 @@ Abc_TtFill.exit: ; preds = %.lr.ph95, %Abc_TtPr br label %.preheader.us.i73 .preheader.us.i73: ; preds = %._crit_edge.us.i77, %.preheader.us.preheader.i71 - %.014.us.i74 = phi ptr [ %152, %._crit_edge.us.i77 ], [ %50, %.preheader.us.preheader.i71 ] + %.014.us.i74 = phi ptr [ %152, %._crit_edge.us.i76 ], [ %50, %.preheader.us.preheader.i71 ] br label %142 142: ; preds = %142, %.preheader.us.i73 - %.01213.us.i75 = phi i32 [ 0, %.preheader.us.i73 ], [ %151, %142 ] + %.01213.us.i75 = phi i32 [ 0, %.preheader.us.i72 ], [ %151, %142 ] %143 = lshr i32 %.01213.us.i75, 5 %144 = zext nneg i32 %143 to i64 %145 = getelementptr inbounds nuw i32, ptr %.014.us.i74, i64 %144 diff --git a/bench/abc/optimized/rsbDec6.ll b/bench/abc/optimized/rsbDec6.ll index 8553333d1be..032cf0a12e3 100644 --- a/bench/abc/optimized/rsbDec6.ll +++ b/bench/abc/optimized/rsbDec6.ll @@ -3687,8 +3687,8 @@ define void @Rsb_DecPrintFunc(ptr noundef readonly captures(none) %0, i32 nounde br i1 %or.cond.i, label %Abc_TtPrintBinary.exit51.critedge, label %.preheader.us.preheader.i .preheader.us.preheader.i: ; preds = %6 - %57 = tail call noundef range(i32 -2147483648, 65) i32 @llvm.smin.i32(i32 %13, i32 64) - %smax.i = tail call i32 @llvm.smax.i32(i32 %57, i32 1) + %57 = tail call i32 @llvm.smax.i32(i32 %13, i32 1) + %smax.i = tail call i32 @llvm.umin.i32(i32 %57, i32 64) br label %.preheader.us.i .preheader.us.i: ; preds = %._crit_edge.us.i, %.preheader.us.preheader.i @@ -3696,7 +3696,7 @@ define void @Rsb_DecPrintFunc(ptr noundef readonly captures(none) %0, i32 nounde br label %58 58: ; preds = %58, %.preheader.us.i - %.01112.us.i = phi i32 [ 0, %.preheader.us.i ], [ %67, %58 ] + %.01112.us.i = phi i32 [ 0, %.preheader.us.i ], [ %67, %59 ] %59 = lshr i32 %.01112.us.i, 5 %60 = zext nneg i32 %59 to i64 %61 = getelementptr inbounds nuw i32, ptr %.013.us.i, i64 %60 @@ -3720,11 +3720,11 @@ Abc_TtPrintBinary.exit: ; preds = %._crit_edge.us.i br label %.preheader.us.i45 .preheader.us.i45: ; preds = %._crit_edge.us.i49, %Abc_TtPrintBinary.exit - %.013.us.i46 = phi ptr [ %81, %._crit_edge.us.i49 ], [ %7, %Abc_TtPrintBinary.exit ] + %.013.us.i46 = phi ptr [ %81, %._crit_edge.us.i48 ], [ %7, %Abc_TtPrintBinary.exit ] br label %71 71: ; preds = %71, %.preheader.us.i45 - %.01112.us.i47 = phi i32 [ 0, %.preheader.us.i45 ], [ %80, %71 ] + %.01112.us.i47 = phi i32 [ 0, %.preheader.us.i44 ], [ %80, %72 ] %72 = lshr i32 %.01112.us.i47, 5 %73 = zext nneg i32 %72 to i64 %74 = getelementptr inbounds nuw i32, ptr %.013.us.i46, i64 %73 @@ -3754,16 +3754,16 @@ Abc_TtPrintBinary.exit51: ; preds = %._crit_edge.us.i49, br i1 %or.cond.i, label %Abc_TtPrintBinary.exit62, label %.preheader.us.preheader.i54 .preheader.us.preheader.i54: ; preds = %Abc_TtPrintBinary.exit51 - %86 = call noundef range(i32 -2147483648, 65) i32 @llvm.smin.i32(i32 %13, i32 64) - %smax.i55 = call i32 @llvm.smax.i32(i32 %86, i32 1) + %86 = call i32 @llvm.smax.i32(i32 %13, i32 1) + %smax.i55 = call i32 @llvm.umin.i32(i32 %86, i32 64) br label %.preheader.us.i56 .preheader.us.i56: ; preds = %._crit_edge.us.i60, %.preheader.us.preheader.i54 - %.013.us.i57 = phi ptr [ %97, %._crit_edge.us.i60 ], [ %8, %.preheader.us.preheader.i54 ] + %.013.us.i57 = phi ptr [ %97, %._crit_edge.us.i58 ], [ %8, %.preheader.us.preheader.i53 ] br label %87 87: ; preds = %87, %.preheader.us.i56 - %.01112.us.i58 = phi i32 [ 0, %.preheader.us.i56 ], [ %96, %87 ] + %.01112.us.i58 = phi i32 [ 0, %.preheader.us.i54 ], [ %96, %89 ] %88 = lshr i32 %.01112.us.i58, 5 %89 = zext nneg i32 %88 to i64 %90 = getelementptr inbounds nuw i32, ptr %.013.us.i57, i64 %89 @@ -3803,11 +3803,11 @@ Abc_TtPrintBinary.exit62: ; preds = %._crit_edge.us.i60, br label %.preheader.us.i67 .preheader.us.i67: ; preds = %._crit_edge.us.i71, %.preheader.us.preheader.i65 - %.013.us.i68 = phi ptr [ %119, %._crit_edge.us.i71 ], [ %2, %.preheader.us.preheader.i65 ] + %.013.us.i68 = phi ptr [ %119, %._crit_edge.us.i68 ], [ %2, %.preheader.us.preheader.i63 ] br label %109 109: ; preds = %109, %.preheader.us.i67 - %.01112.us.i69 = phi i32 [ 0, %.preheader.us.i67 ], [ %118, %109 ] + %.01112.us.i69 = phi i32 [ 0, %.preheader.us.i64 ], [ %118, %111 ] %110 = lshr i32 %.01112.us.i69, 5 %111 = zext nneg i32 %110 to i64 %112 = getelementptr inbounds nuw i32, ptr %.013.us.i68, i64 %111 @@ -3857,8 +3857,8 @@ Abc_TtPrintBinary.exit73: ; preds = %._crit_edge.us.i71, br i1 %129, label %.lr.ph78, label %._crit_edge79 .lr.ph78: ; preds = %._crit_edge, %.lr.ph78 - %indvars.iv81 = phi i64 [ %indvars.iv.next82, %.lr.ph78 ], [ 0, %._crit_edge ] - %130 = phi ptr [ %135, %.lr.ph78 ], [ %127, %._crit_edge ] + %indvars.iv81 = phi i64 [ %indvars.iv.next82, %.lr.ph75 ], [ 0, %._crit_edge ] + %130 = phi ptr [ %135, %.lr.ph75 ], [ %127, %._crit_edge ] %131 = getelementptr i8, ptr %130, i64 8 %.val32 = load ptr, ptr %131, align 8, !tbaa !25 %132 = getelementptr inbounds nuw i32, ptr %.val32, i64 %indvars.iv81 diff --git a/bench/box2d/optimized/imgui_draw.ll b/bench/box2d/optimized/imgui_draw.ll index 1e49b7f4a9b..adeecfbbc28 100644 --- a/bench/box2d/optimized/imgui_draw.ll +++ b/bench/box2d/optimized/imgui_draw.ll @@ -1378,8 +1378,8 @@ define dso_local void @_ZN20ImDrawListSharedData29SetCircleTessellationMaxErrorE %23 = add nsw i32 %22, 1 %24 = sdiv i32 %23, 2 %25 = shl nsw i32 %24, 1 - %26 = tail call i32 @llvm.smin.i32(i32 range(i32 -2147483646, -2147483648) %25, i32 512) - %27 = tail call i32 @llvm.smax.i32(i32 %26, i32 4) + %26 = tail call i32 @llvm.smax.i32(i32 %25, i32 4) + %27 = tail call i32 @llvm.umin.i32(i32 %26, i32 512) %28 = trunc i32 %27 to i8 br label %29 @@ -3336,8 +3336,8 @@ define dso_local noundef range(i32 0, 513) i32 @_ZNK10ImDrawList27_CalcCircleAut %24 = add nsw i32 %23, 1 %25 = sdiv i32 %24, 2 %26 = shl nsw i32 %25, 1 - %27 = tail call i32 @llvm.smin.i32(i32 range(i32 -2147483646, -2147483648) %26, i32 512) - %28 = tail call i32 @llvm.smax.i32(i32 %27, i32 4) + %27 = tail call i32 @llvm.smax.i32(i32 %26, i32 4) + %28 = tail call i32 @llvm.umin.i32(i32 %27, i32 512) br label %29 29: ; preds = %13, %7 @@ -5306,8 +5306,8 @@ _ZN8ImVectorI6ImVec2E9push_backERKS0_.exit: ; preds = %._ZN8ImVectorI6ImVe %63 = add nsw i32 %62, 1 %64 = sdiv i32 %63, 2 %65 = shl nsw i32 %64, 1 - %66 = tail call i32 @llvm.smin.i32(i32 range(i32 -2147483646, -2147483648) %65, i32 512) - %67 = tail call i32 @llvm.smax.i32(i32 %66, i32 4) + %66 = tail call i32 @llvm.smax.i32(i32 %65, i32 4) + %67 = tail call i32 @llvm.umin.i32(i32 %66, i32 512) br label %_ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit _ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit: ; preds = %46, %52 @@ -6124,8 +6124,8 @@ _ZN8ImVectorI6ImVec2E9push_backERKS0_.exit81: ; preds = %._ZN8ImVectorI6ImVe %217 = add nsw i32 %216, 1 %218 = sdiv i32 %217, 2 %219 = shl nsw i32 %218, 1 - %220 = tail call i32 @llvm.smin.i32(i32 range(i32 -2147483646, -2147483648) %219, i32 512) - %221 = tail call i32 @llvm.smax.i32(i32 %220, i32 4) + %220 = tail call i32 @llvm.smax.i32(i32 %219, i32 4) + %221 = tail call i32 @llvm.umin.i32(i32 %220, i32 512) br label %_ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit _ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit: ; preds = %200, %206 @@ -6185,8 +6185,8 @@ define dso_local void @_ZN10ImDrawList19PathEllipticalArcToERK6ImVec2S2_fffi(ptr %36 = add nsw i32 %35, 1 %37 = sdiv i32 %36, 2 %38 = shl nsw i32 %37, 1 - %39 = tail call i32 @llvm.smin.i32(i32 range(i32 -2147483646, -2147483648) %38, i32 512) - %40 = tail call i32 @llvm.smax.i32(i32 %39, i32 4) + %39 = tail call i32 @llvm.smax.i32(i32 %38, i32 4) + %40 = tail call i32 @llvm.umin.i32(i32 %39, i32 512) br label %_ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit _ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit: ; preds = %25, %19, %7 @@ -9004,8 +9004,8 @@ define dso_local void @_ZN10ImDrawList10AddEllipseERK6ImVec2S2_jfif(ptr noundef %38 = add nsw i32 %37, 1 %39 = sdiv i32 %38, 2 %40 = shl nsw i32 %39, 1 - %41 = tail call i32 @llvm.smin.i32(i32 range(i32 -2147483646, -2147483648) %40, i32 512) - %42 = tail call i32 @llvm.smax.i32(i32 %41, i32 4) + %41 = tail call i32 @llvm.smax.i32(i32 %40, i32 4) + %42 = tail call i32 @llvm.umin.i32(i32 %41, i32 512) br label %_ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit _ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit: ; preds = %27, %21, %9 @@ -9072,8 +9072,8 @@ define dso_local void @_ZN10ImDrawList16AddEllipseFilledERK6ImVec2S2_jfi(ptr nou %37 = add nsw i32 %36, 1 %38 = sdiv i32 %37, 2 %39 = shl nsw i32 %38, 1 - %40 = tail call i32 @llvm.smin.i32(i32 range(i32 -2147483646, -2147483648) %39, i32 512) - %41 = tail call i32 @llvm.smax.i32(i32 %40, i32 4) + %40 = tail call i32 @llvm.smax.i32(i32 %39, i32 4) + %41 = tail call i32 @llvm.umin.i32(i32 %40, i32 512) br label %_ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit _ZNK10ImDrawList27_CalcCircleAutoSegmentCountEf.exit: ; preds = %26, %20, %8 diff --git a/bench/bullet3/optimized/btMultiSphereShape.ll b/bench/bullet3/optimized/btMultiSphereShape.ll index c54379b69f4..1b405fef106 100644 --- a/bench/bullet3/optimized/btMultiSphereShape.ll +++ b/bench/bullet3/optimized/btMultiSphereShape.ll @@ -377,8 +377,8 @@ define dso_local { <2 x float>, <2 x float> } @_ZNK18btMultiSphereShape37localGe %.026100 = phi i32 [ 0, %.lr.ph105 ], [ %82, %81 ] %.sroa.4.099 = phi <2 x float> [ zeroinitializer, %.lr.ph105 ], [ %.sroa.4.1, %81 ] %.sroa.089.098 = phi <2 x float> [ zeroinitializer, %.lr.ph105 ], [ %.sroa.089.1, %81 ] - %25 = tail call i32 @llvm.smin.i32(i32 %indvars.iv, i32 128) - %26 = tail call i32 @llvm.smax.i32(i32 %25, i32 1) + %25 = tail call i32 @llvm.smax.i32(i32 %indvars.iv, i32 1) + %26 = tail call i32 @llvm.umin.i32(i32 %25, i32 128) %smax = zext nneg i32 %26 to i64 call void @llvm.lifetime.start.p0(i64 2048, ptr nonnull %3) #14 %27 = sub nsw i32 %15, %.026100 @@ -538,8 +538,8 @@ define dso_local void @_ZNK18btMultiSphereShape49batchedUnitVectorGetSupportingV %.03081 = phi ptr [ %19, %.lr.ph84 ], [ %.131.lcssa, %90 ] %.03280 = phi i32 [ 0, %.lr.ph84 ], [ %91, %90 ] %.03379 = phi ptr [ %18, %.lr.ph84 ], [ %.134.lcssa, %90 ] - %24 = tail call i32 @llvm.smin.i32(i32 %indvars.iv, i32 128) - %25 = tail call i32 @llvm.smax.i32(i32 %24, i32 1) + %24 = tail call i32 @llvm.smax.i32(i32 %indvars.iv, i32 1) + %25 = tail call i32 @llvm.umin.i32(i32 %24, i32 128) %smax = zext nneg i32 %25 to i64 call void @llvm.lifetime.start.p0(i64 2048, ptr nonnull %5) #14 %26 = sub nsw i32 %15, %.03280 @@ -1038,6 +1038,9 @@ declare i32 @llvm.smin.i32(i32, i32) #13 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #13 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #13 + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.sqrt.f32(float) #13 diff --git a/bench/ceres/optimized/gradient_checker.ll b/bench/ceres/optimized/gradient_checker.ll index 3f3bc9327cd..3a150444eed 100644 --- a/bench/ceres/optimized/gradient_checker.ll +++ b/bench/ceres/optimized/gradient_checker.ll @@ -4846,55 +4846,55 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, 17: ; preds = %_ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit %18 = add nsw i64 %13, -128 %19 = sdiv i64 %18, 64 - %.sroa.speculated157 = tail call i64 @llvm.smin.i64(i64 %19, i64 320) - %.sroa.speculated163 = tail call i64 @llvm.smax.i64(i64 %.sroa.speculated157, i64 8) + %.sroa.speculated157 = tail call i64 @llvm.smax.i64(i64 %19, i64 8) + %.sroa.speculated163 = tail call i64 @llvm.umin.i64(i64 %.sroa.speculated157, i64 320) %20 = load i64, ptr %0, align 8, !tbaa !119 %21 = icmp slt i64 %.sroa.speculated163, %20 - br i1 %21, label %22, label %24 + br i1 %21, label %22, label %26 22: ; preds = %17 %23 = and i64 %.sroa.speculated163, 504 store i64 %23, ptr %0, align 8, !tbaa !119 - br label %24 + br label %26 -24: ; preds = %22, %17 - %25 = phi i64 [ %23, %22 ], [ %20, %17 ] - %26 = sub nsw i64 %14, %13 - %27 = shl i64 %25, 5 - %28 = udiv i64 %26, %27 - %29 = load i64, ptr %2, align 8, !tbaa !119 - %30 = add nsw i64 %3, -1 - %31 = add i64 %30, %29 - %32 = sdiv i64 %31, %3 - %.not114 = icmp sgt i64 %28, %32 - br i1 %.not114, label %36, label %33 - -33: ; preds = %24 - %34 = srem i64 %28, 4 - %35 = sub nsw i64 %28, %34 - br label %40 - -36: ; preds = %24 - %37 = add nsw i64 %32, 3 - %38 = srem i64 %37, 4 - %39 = sub nsw i64 %37, %38 - %.sroa.speculated153 = tail call i64 @llvm.smin.i64(i64 %39, i64 %29) - br label %40 - -40: ; preds = %36, %33 - %storemerge184 = phi i64 [ %35, %33 ], [ %.sroa.speculated153, %36 ] +26: ; preds = %24, %17 + %27 = phi i64 [ %25, %24 ], [ %22, %17 ] + %28 = sub nsw i64 %14, %13 + %29 = shl i64 %27, 5 + %30 = udiv i64 %28, %29 + %31 = load i64, ptr %2, align 8, !tbaa !119 + %32 = add nsw i64 %3, -1 + %33 = add i64 %32, %31 + %34 = sdiv i64 %33, %3 + %.not114 = icmp sgt i64 %30, %34 + br i1 %.not114, label %38, label %35 + +35: ; preds = %26 + %36 = srem i64 %30, 4 + %37 = sub nsw i64 %30, %36 + br label %42 + +38: ; preds = %26 + %39 = add nsw i64 %34, 3 + %40 = srem i64 %39, 4 + %41 = sub nsw i64 %39, %40 + %.sroa.speculated153 = tail call i64 @llvm.smin.i64(i64 %41, i64 %31) + br label %42 + +24: ; preds = %38, %35 + %25 = phi i64 [ %37, %35 ], [ %.sroa.speculated153, %38 ] store i64 %storemerge184, ptr %2, align 8, !tbaa !119 %41 = icmp sgt i64 %15, %14 br i1 %41, label %42, label %.critedge116 -42: ; preds = %40 +42: ; preds = %42 %43 = sub nsw i64 %15, %14 %44 = load i64, ptr %0, align 8, !tbaa !119 %45 = shl i64 %3, 3 %46 = mul i64 %45, %44 %47 = udiv i64 %43, %46 %48 = load i64, ptr %1, align 8, !tbaa !119 - %49 = add i64 %30, %48 + %49 = add i64 %32, %48 %50 = sdiv i64 %49, %3 %51 = icmp slt i64 %47, %50 %52 = icmp sgt i64 %47, 3 @@ -4921,147 +4921,147 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, %63 = load i64, ptr %0, align 8, !tbaa !119 %.sroa.speculated145 = tail call i64 @llvm.smax.i64(i64 %63, i64 %62) %64 = icmp slt i64 %.sroa.speculated145, 48 - br i1 %64, label %.critedge116, label %65 - -65: ; preds = %59 - %66 = add nsw i64 %13, -128 - %67 = sdiv i64 %66, 64 - %68 = and i64 %67, -8 - %.sroa.speculated140 = tail call i64 @llvm.smax.i64(i64 %68, i64 1) - %69 = icmp sgt i64 %63, %.sroa.speculated140 - br i1 %69, label %70, label %84 - -70: ; preds = %65 - %71 = urem i64 %63, %.sroa.speculated140 - %72 = udiv i64 %63, %.sroa.speculated140 - %73 = icmp eq i64 %71, 0 - br i1 %73, label %82, label %74 - -74: ; preds = %70 - %75 = xor i64 %71, -1 - %76 = add nsw i64 %.sroa.speculated140, %75 - %77 = shl i64 %72, 3 - %78 = add i64 %77, 8 - %79 = sdiv i64 %76, %78 - %80 = shl nsw i64 %79, 3 - %81 = sub nsw i64 %.sroa.speculated140, %80 - br label %82 - -82: ; preds = %70, %74 - %83 = phi i64 [ %81, %74 ], [ %.sroa.speculated140, %70 ] - store i64 %83, ptr %0, align 8, !tbaa !119 - %.pre = load i64, ptr %1, align 8, !tbaa !119 + br i1 %64, label %.critedge116, label %67 + +82: ; preds = %61 + %68 = add nsw i64 %13, -128 + %69 = sdiv i64 %68, 64 + %70 = and i64 %69, -8 + %.sroa.speculated140 = tail call i64 @llvm.smax.i64(i64 %70, i64 1) + %71 = icmp sgt i64 %65, %.sroa.speculated140 + br i1 %71, label %72, label %86 + +72: ; preds = %67 + %73 = urem i64 %65, %.sroa.speculated140 + %74 = udiv i64 %65, %.sroa.speculated140 + %75 = icmp eq i64 %73, 0 + br i1 %75, label %84, label %76 + +76: ; preds = %72 + %77 = xor i64 %73, -1 + %78 = add nsw i64 %.sroa.speculated140, %77 + %79 = shl i64 %74, 3 + %80 = add i64 %79, 8 + %81 = sdiv i64 %78, %80 + %82 = shl nsw i64 %81, 3 + %83 = sub nsw i64 %.sroa.speculated140, %82 br label %84 -84: ; preds = %82, %65 - %85 = phi i64 [ %83, %82 ], [ %63, %65 ] - %86 = phi i64 [ %.pre, %82 ], [ %60, %65 ] - %87 = shl i64 %86, 3 - %88 = mul i64 %87, %85 - %89 = sub nsw i64 %66, %88 - %90 = shl nsw i64 %85, 5 - %.not = icmp slt i64 %89, %90 - br i1 %.not, label %94, label %91 - -91: ; preds = %84 - %92 = shl i64 %85, 3 - %93 = udiv i64 %89, %92 - br label %97 - -94: ; preds = %84 - %95 = shl i64 %.sroa.speculated140, 5 - %96 = udiv i64 4718592, %95 - br label %97 - -97: ; preds = %94, %91 - %storemerge = phi i64 [ %96, %94 ], [ %93, %91 ] - %98 = shl i64 %85, 4 - %99 = udiv i64 1572864, %98 - %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %99) - %100 = and i64 %.sroa.speculated135, -4 - %101 = load i64, ptr %2, align 8, !tbaa !119 - %102 = icmp sgt i64 %101, %100 - br i1 %102, label %103, label %116 - -103: ; preds = %97 - %104 = srem i64 %101, %100 - %105 = sdiv i64 %101, %100 - %106 = icmp eq i64 %104, 0 - br i1 %106, label %114, label %107 - -107: ; preds = %103 - %108 = sub nsw i64 %100, %104 - %109 = shl i64 %105, 2 - %110 = add i64 %109, 4 - %111 = sdiv i64 %108, %110 - %112 = shl nsw i64 %111, 2 - %113 = sub nsw i64 %100, %112 - br label %114 - -114: ; preds = %103, %107 - %115 = phi i64 [ %113, %107 ], [ %100, %103 ] - store i64 %115, ptr %2, align 8, !tbaa !119 - br label %.critedge116 - -116: ; preds = %97 - %117 = icmp eq i64 %63, %85 - br i1 %117, label %118, label %.critedge116 - -118: ; preds = %116 - %119 = shl i64 %63, 3 - %120 = mul i64 %119, %101 - %121 = icmp slt i64 %120, 1025 - br i1 %121, label %126, label %122 +84: ; preds = %72, %76 + %85 = phi i64 [ %83, %76 ], [ %.sroa.speculated140, %72 ] + store i64 %85, ptr %0, align 8, !tbaa !119 + %.pre = load i64, ptr %1, align 8, !tbaa !119 + br label %86 -122: ; preds = %118 - %123 = icmp ne i64 %15, 0 - %124 = icmp samesign ult i64 %120, 32769 - %or.cond4 = and i1 %123, %124 - br i1 %or.cond4, label %125, label %126 +122: ; preds = %84, %67 + %87 = phi i64 [ %85, %84 ], [ %65, %67 ] + %88 = phi i64 [ %.pre, %84 ], [ %60, %67 ] + %89 = shl i64 %88, 3 + %90 = mul i64 %89, %87 + %91 = sub nsw i64 %68, %90 + %92 = shl nsw i64 %87, 5 + %.not = icmp slt i64 %91, %92 + br i1 %.not, label %96, label %93 + +93: ; preds = %86 + %94 = shl i64 %87, 3 + %95 = udiv i64 %91, %94 + br label %99 125: ; preds = %122 - %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %86, i64 576) - br label %126 + %97 = shl i64 %.sroa.speculated140, 5 + %98 = udiv i64 4718592, %97 + br label %99 + +99: ; preds = %96, %93 + %storemerge = phi i64 [ %98, %96 ], [ %95, %93 ] + %100 = shl i64 %87, 4 + %101 = udiv i64 1572864, %100 + %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %101) + %102 = and i64 %.sroa.speculated135, -4 + %103 = load i64, ptr %2, align 8, !tbaa !119 + %104 = icmp sgt i64 %103, %102 + br i1 %104, label %105, label %118 + +105: ; preds = %99 + %106 = srem i64 %103, %102 + %107 = sdiv i64 %103, %102 + %108 = icmp eq i64 %106, 0 + br i1 %108, label %116, label %109 + +109: ; preds = %105 + %110 = sub nsw i64 %102, %106 + %111 = shl i64 %107, 2 + %112 = add i64 %111, 4 + %113 = sdiv i64 %110, %112 + %114 = shl nsw i64 %113, 2 + %115 = sub nsw i64 %102, %114 + br label %116 + +116: ; preds = %105, %109 + %117 = phi i64 [ %115, %109 ], [ %102, %105 ] + store i64 %117, ptr %2, align 8, !tbaa !119 + br label %.critedge116 -126: ; preds = %118, %122, %125 - %.0182 = phi i64 [ %.sroa.speculated128, %125 ], [ %86, %122 ], [ %86, %118 ] - %.093 = phi i64 [ %14, %125 ], [ 1572864, %122 ], [ %13, %118 ] - %127 = mul i64 %63, 24 - %128 = udiv i64 %.093, %127 - %.sroa.speculated = tail call i64 @llvm.smin.i64(i64 %.0182, i64 %128) - %129 = icmp sgt i64 %.sroa.speculated, 4 - br i1 %129, label %130, label %132 - -130: ; preds = %126 - %131 = and i64 %.sroa.speculated, 9223372036854775804 - br label %134 - -132: ; preds = %126 - %133 = icmp eq i64 %.sroa.speculated, 0 - br i1 %133, label %.critedge116, label %134 - -134: ; preds = %132, %130 - %.0 = phi i64 [ %131, %130 ], [ %.sroa.speculated, %132 ] - %135 = srem i64 %86, %.0 - %136 = sdiv i64 %86, %.0 - %137 = icmp eq i64 %135, 0 - br i1 %137, label %.critedge, label %138 - -138: ; preds = %134 - %139 = sub nsw i64 %.0, %135 - %140 = shl i64 %136, 2 - %141 = add i64 %140, 4 - %142 = sdiv i64 %139, %141 - %143 = shl nsw i64 %142, 2 - %144 = sub nsw i64 %.0, %143 +126: ; preds = %99 + %119 = icmp eq i64 %65, %87 + br i1 %119, label %120, label %.critedge116 + +120:; preds = %126 + %121 = shl i64 %65, 3 + %122 = mul i64 %121, %103 + %123 = icmp slt i64 %122, 1025 + br i1 %123, label %128, label %124 + +124:; preds = %120 + %125 = icmp ne i64 %15, 0 + %126 = icmp samesign ult i64 %122, 32769 + %or.cond4 = and i1 %125, %126 + br i1 %or.cond4, label %127, label %128 + +127: ; preds = %124 + %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %88, i64 576) + br label %128 + +128:; preds = %120, %124, %127 + %.0182 = phi i64 [ %.sroa.speculated128, %127 ], [ %88, %124 ], [ %88, %120 ] + %.093 = phi i64 [ %14, %127 ], [ 1572864, %124 ], [ %13, %120 ] + %129 = mul i64 %65, 24 + %130 = udiv i64 %.093, %129 + %.sroa.speculated = tail call i64 @llvm.smin.i64(i64 %.0182, i64 %130) + %131 = icmp sgt i64 %.sroa.speculated, 4 + br i1 %131, label %132, label %134 + +132:; preds = %128 + %133 = and i64 %.sroa.speculated, 9223372036854775804 + br label %136 + +134: ; preds = %128 + %135 = icmp eq i64 %.sroa.speculated, 0 + br i1 %135, label %.critedge116, label %136 + +136: ; preds = %134, %132 + %.0 = phi i64 [ %133, %132 ], [ %.sroa.speculated, %134 ] + %137 = srem i64 %88, %.0 + %138 = sdiv i64 %88, %.0 + %139 = icmp eq i64 %137, 0 + br i1 %139, label %.critedge, label %140 + +140: ; preds = %136 + %141 = sub nsw i64 %.0, %137 + %142 = shl i64 %138, 2 + %143 = add i64 %142, 4 + %144 = sdiv i64 %141, %143 + %145 = shl nsw i64 %144, 2 + %146 = sub nsw i64 %.0, %145 br label %.critedge -.critedge: ; preds = %134, %138 - %145 = phi i64 [ %144, %138 ], [ %.0, %134 ] +.critedge: ; preds = %136, %140 + %145 = phi i64 [ %146, %140 ], [ %.0, %136 ] store i64 %145, ptr %1, align 8, !tbaa !119 br label %.critedge116 -.critedge116: ; preds = %.critedge, %116, %114, %132, %53, %55, %40, %59 +.critedge116: ; preds = %.critedge, %118, %116, %134, %53, %55, %42, %59 ret void } diff --git a/bench/ceres/optimized/manifold.ll b/bench/ceres/optimized/manifold.ll index 83b1973cbe6..98d959548ec 100644 --- a/bench/ceres/optimized/manifold.ll +++ b/bench/ceres/optimized/manifold.ll @@ -3718,55 +3718,55 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, 17: ; preds = %_ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit %18 = add nsw i64 %13, -128 %19 = sdiv i64 %18, 64 - %.sroa.speculated157 = tail call i64 @llvm.smin.i64(i64 %19, i64 320) - %.sroa.speculated163 = tail call i64 @llvm.smax.i64(i64 %.sroa.speculated157, i64 8) + %.sroa.speculated157 = tail call i64 @llvm.smax.i64(i64 %19, i64 8) + %.sroa.speculated163 = tail call i64 @llvm.umin.i64(i64 %.sroa.speculated157, i64 320) %20 = load i64, ptr %0, align 8, !tbaa !59 %21 = icmp slt i64 %.sroa.speculated163, %20 - br i1 %21, label %22, label %24 + br i1 %21, label %22, label %26 22: ; preds = %17 %23 = and i64 %.sroa.speculated163, 504 store i64 %23, ptr %0, align 8, !tbaa !59 - br label %24 - -24: ; preds = %22, %17 - %25 = phi i64 [ %23, %22 ], [ %20, %17 ] - %26 = sub nsw i64 %14, %13 - %27 = shl i64 %25, 5 - %28 = udiv i64 %26, %27 - %29 = load i64, ptr %2, align 8, !tbaa !59 - %30 = add nsw i64 %3, -1 - %31 = add i64 %30, %29 - %32 = sdiv i64 %31, %3 - %.not114 = icmp sgt i64 %28, %32 - br i1 %.not114, label %36, label %33 - -33: ; preds = %24 - %34 = srem i64 %28, 4 - %35 = sub nsw i64 %28, %34 - br label %40 - -36: ; preds = %24 - %37 = add nsw i64 %32, 3 - %38 = srem i64 %37, 4 - %39 = sub nsw i64 %37, %38 - %.sroa.speculated153 = tail call i64 @llvm.smin.i64(i64 %39, i64 %29) - br label %40 + br label %26 -40: ; preds = %36, %33 - %storemerge184 = phi i64 [ %35, %33 ], [ %.sroa.speculated153, %36 ] +36: ; preds = %24, %17 + %27 = phi i64 [ %23, %24 ], [ %22, %17 ] + %28 = sub nsw i64 %14, %13 + %29 = shl i64 %27, 5 + %30 = udiv i64 %28, %29 + %31 = load i64, ptr %2, align 8, !tbaa !59 + %32 = add nsw i64 %3, -1 + %33 = add i64 %32, %31 + %34 = sdiv i64 %33, %3 + %.not114 = icmp sgt i64 %30, %34 + br i1 %.not114, label %38, label %35 + +35: ; preds = %26 + %36 = srem i64 %30, 4 + %37 = sub nsw i64 %30, %36 + br label %42 + +40: ; preds = %36 + %39 = add nsw i64 %34, 3 + %40 = srem i64 %39, 4 + %41 = sub nsw i64 %39, %40 + %.sroa.speculated153 = tail call i64 @llvm.smin.i64(i64 %41, i64 %31) + br label %42 + +42: ; preds = %38, %35 + %storemerge184 = phi i64 [ %37, %35 ], [ %.sroa.speculated153, %38 ] store i64 %storemerge184, ptr %2, align 8, !tbaa !59 %41 = icmp sgt i64 %15, %14 br i1 %41, label %42, label %.critedge116 -42: ; preds = %40 +42: ; preds = %42 %43 = sub nsw i64 %15, %14 %44 = load i64, ptr %0, align 8, !tbaa !59 %45 = shl i64 %3, 3 %46 = mul i64 %45, %44 %47 = udiv i64 %43, %46 %48 = load i64, ptr %1, align 8, !tbaa !59 - %49 = add i64 %30, %48 + %49 = add i64 %32, %48 %50 = sdiv i64 %49, %3 %51 = icmp slt i64 %47, %50 %52 = icmp sgt i64 %47, 3 @@ -3801,13 +3801,13 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, %68 = and i64 %67, -8 %.sroa.speculated140 = tail call i64 @llvm.smax.i64(i64 %68, i64 1) %69 = icmp sgt i64 %63, %.sroa.speculated140 - br i1 %69, label %70, label %84 + br i1 %69, label %70, label %86 70: ; preds = %65 %71 = urem i64 %63, %.sroa.speculated140 %72 = udiv i64 %63, %.sroa.speculated140 %73 = icmp eq i64 %71, 0 - br i1 %73, label %82, label %74 + br i1 %73, label %84, label %74 74: ; preds = %70 %75 = xor i64 %71, -1 @@ -3817,71 +3817,71 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, %79 = sdiv i64 %76, %78 %80 = shl nsw i64 %79, 3 %81 = sub nsw i64 %.sroa.speculated140, %80 - br label %82 - -82: ; preds = %70, %74 - %83 = phi i64 [ %81, %74 ], [ %.sroa.speculated140, %70 ] - store i64 %83, ptr %0, align 8, !tbaa !59 - %.pre = load i64, ptr %1, align 8, !tbaa !59 br label %84 -84: ; preds = %82, %65 - %85 = phi i64 [ %83, %82 ], [ %63, %65 ] - %86 = phi i64 [ %.pre, %82 ], [ %60, %65 ] - %87 = shl i64 %86, 3 - %88 = mul i64 %87, %85 - %89 = sub nsw i64 %66, %88 - %90 = shl nsw i64 %85, 5 - %.not = icmp slt i64 %89, %90 - br i1 %.not, label %94, label %91 - -91: ; preds = %84 - %92 = shl i64 %85, 3 - %93 = udiv i64 %89, %92 - br label %97 - -94: ; preds = %84 - %95 = shl i64 %.sroa.speculated140, 5 - %96 = udiv i64 4718592, %95 - br label %97 - -97: ; preds = %94, %91 - %storemerge = phi i64 [ %96, %94 ], [ %93, %91 ] - %98 = shl i64 %85, 4 - %99 = udiv i64 1572864, %98 - %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %99) - %100 = and i64 %.sroa.speculated135, -4 - %101 = load i64, ptr %2, align 8, !tbaa !59 - %102 = icmp sgt i64 %101, %100 - br i1 %102, label %103, label %116 - -103: ; preds = %97 - %104 = srem i64 %101, %100 - %105 = sdiv i64 %101, %100 - %106 = icmp eq i64 %104, 0 - br i1 %106, label %114, label %107 - -107: ; preds = %103 - %108 = sub nsw i64 %100, %104 - %109 = shl i64 %105, 2 - %110 = add i64 %109, 4 - %111 = sdiv i64 %108, %110 - %112 = shl nsw i64 %111, 2 - %113 = sub nsw i64 %100, %112 - br label %114 - -114: ; preds = %103, %107 - %115 = phi i64 [ %113, %107 ], [ %100, %103 ] - store i64 %115, ptr %2, align 8, !tbaa !59 +84: ; preds = %72, %76 + %85 = phi i64 [ %83, %76 ], [ %.sroa.speculated138, %72 ] + store i64 %85, ptr %0, align 8, !tbaa !59 + %.pre = load i64, ptr %1, align 8, !tbaa !59 + br label %86 + +86:; preds = %84, %65 + %87 = phi i64 [ %85, %84 ], [ %65, %67 ] + %88 = phi i64 [ %.pre, %84 ], [ %62, %67 ] + %89 = shl i64 %88, 3 + %90 = mul i64 %89, %87 + %91 = sub nsw i64 %68, %90 + %92 = shl nsw i64 %87, 5 + %.not = icmp slt i64 %91, %92 + br i1 %.not, label %96, label %93 + +93:; preds = %86 + %94 = shl i64 %87, 3 + %95 = udiv i64 %91, %94 + br label %99 + +96:; preds = %86 + %98 = shl i64 %.sroa.speculated138, 5 + %99 = udiv i64 4718592, %98 + br label %99 + +99:; preds = %96, %93 + %storemerge = phi i64 [ %98, %96 ], [ %95, %93 ] + %100 = shl i64 %87, 4 + %101 = udiv i64 1572864, %100 + %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %101) + %102 = and i64 %.sroa.speculated135, -4 + %103 = load i64, ptr %2, align 8, !tbaa !59 + %106 = icmp sgt i64 %103, %102 + br i1 %106, label %105, label %118 + +107: ; preds = %99 + %106 = srem i64 %103, %102 + %107 = sdiv i64 %103, %102 + %108 = icmp eq i64 %106, 0 + br i1 %108, label %116, label %109 + +109:; preds = %105 + %110 = sub nsw i64 %102, %106 + %111 = shl i64 %107, 2 + %112 = add i64 %111, 4 + %113 = sdiv i64 %110, %112 + %114 = shl nsw i64 %113, 2 + %115 = sub nsw i64 %102, %114 + br label %116 + +116: ; preds = %105, %109 + %117 = phi i64 [ %115, %109 ], [ %102, %105 ] + store i64 %117, ptr %2, align 8, !tbaa !59 br label %.critedge116 -116: ; preds = %97 - %117 = icmp eq i64 %63, %85 +116: ; preds = %99 + %117 = icmp eq i64 %63, %87 br i1 %117, label %118, label %.critedge116 118: ; preds = %116 %119 = shl i64 %63, 3 - %120 = mul i64 %119, %101 + %120 = mul i64 %119, %103 %121 = icmp slt i64 %120, 1025 br i1 %121, label %126, label %122 @@ -3892,12 +3892,12 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br i1 %or.cond4, label %125, label %126 125: ; preds = %122 - %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %86, i64 576) + %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %88, i64 576) br label %126 126: ; preds = %118, %122, %125 - %.0182 = phi i64 [ %.sroa.speculated128, %125 ], [ %86, %122 ], [ %86, %118 ] - %.093 = phi i64 [ %14, %125 ], [ 1572864, %122 ], [ %13, %118 ] + %.0182 = phi i64 [ %.sroa.speculated128, %127 ], [ %88, %124 ], [ %88, %120 ] + %.093 = phi i64 [ %14, %127 ], [ 1572864, %124 ], [ %13, %120 ] %127 = mul i64 %63, 24 %128 = udiv i64 %.093, %127 %.sroa.speculated = tail call i64 @llvm.smin.i64(i64 %.0182, i64 %128) @@ -3913,9 +3913,9 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br i1 %133, label %.critedge116, label %134 134: ; preds = %132, %130 - %.0 = phi i64 [ %131, %130 ], [ %.sroa.speculated, %132 ] - %135 = srem i64 %86, %.0 - %136 = sdiv i64 %86, %.0 + %.0 = phi i64 [ %131, %132 ], [ %.sroa.speculated, %134 ] + %135 = srem i64 %88, %.0 + %136 = sdiv i64 %88, %.0 %137 = icmp eq i64 %135, 0 br i1 %137, label %.critedge, label %138 @@ -3929,11 +3929,11 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %.critedge .critedge: ; preds = %134, %138 - %145 = phi i64 [ %144, %138 ], [ %.0, %134 ] + %145 = phi i64 [ %144, %140 ], [ %.0, %136 ] store i64 %145, ptr %1, align 8, !tbaa !59 br label %.critedge116 -.critedge116: ; preds = %.critedge, %116, %114, %132, %53, %55, %40, %59 +.critedge116: ; preds = %.critedge, %116, %116, %132, %53, %55, %42, %59 ret void } @@ -7157,6 +7157,9 @@ declare void @llvm.experimental.noalias.scope.decl(metadata) #34 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.smax.i64(i64, i64) #33 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.umin.i64(i64, i64) #33 + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare double @llvm.sqrt.f64(double) #33 diff --git a/bench/ceres/optimized/schur_complement_solver.ll b/bench/ceres/optimized/schur_complement_solver.ll index cb611263ed8..c804b279cd2 100644 --- a/bench/ceres/optimized/schur_complement_solver.ll +++ b/bench/ceres/optimized/schur_complement_solver.ll @@ -8194,55 +8194,55 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, 17: ; preds = %_ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit %18 = add nsw i64 %13, -128 %19 = sdiv i64 %18, 64 - %.sroa.speculated157 = tail call i64 @llvm.smin.i64(i64 %19, i64 320) - %.sroa.speculated163 = tail call i64 @llvm.smax.i64(i64 %.sroa.speculated157, i64 8) + %.sroa.speculated157 = tail call i64 @llvm.smax.i64(i64 %19, i64 8) + %.sroa.speculated163 = tail call i64 @llvm.umin.i64(i64 %.sroa.speculated157, i64 320) %20 = load i64, ptr %0, align 8, !tbaa !382 %21 = icmp slt i64 %.sroa.speculated163, %20 - br i1 %21, label %22, label %24 + br i1 %21, label %22, label %26 22: ; preds = %17 %23 = and i64 %.sroa.speculated163, 504 store i64 %23, ptr %0, align 8, !tbaa !382 - br label %24 - -24: ; preds = %22, %17 - %25 = phi i64 [ %23, %22 ], [ %20, %17 ] - %26 = sub nsw i64 %14, %13 - %27 = shl i64 %25, 5 - %28 = udiv i64 %26, %27 - %29 = load i64, ptr %2, align 8, !tbaa !382 - %30 = add nsw i64 %3, -1 - %31 = add i64 %30, %29 - %32 = sdiv i64 %31, %3 - %.not114 = icmp sgt i64 %28, %32 - br i1 %.not114, label %36, label %33 - -33: ; preds = %24 - %34 = srem i64 %28, 4 - %35 = sub nsw i64 %28, %34 - br label %40 - -36: ; preds = %24 - %37 = add nsw i64 %32, 3 - %38 = srem i64 %37, 4 - %39 = sub nsw i64 %37, %38 - %.sroa.speculated153 = tail call i64 @llvm.smin.i64(i64 %39, i64 %29) - br label %40 + br label %26 -40: ; preds = %36, %33 - %storemerge184 = phi i64 [ %35, %33 ], [ %.sroa.speculated153, %36 ] +40: ; preds = %24, %17 + %storemerge184 = phi i64 [ %25, %24 ], [ %22, %17 ] + %28 = sub nsw i64 %14, %13 + %29 = shl i64 %27, 5 + %30 = udiv i64 %28, %29 + %31 = load i64, ptr %2, align 8, !tbaa !382 + %32 = add nsw i64 %3, -1 + %33 = add i64 %32, %31 + %34 = sdiv i64 %33, %3 + %.not114 = icmp sgt i64 %30, %34 + br i1 %.not114, label %38, label %35 + +35: ; preds = %26 + %36 = srem i64 %30, 4 + %37 = sub nsw i64 %30, %36 + br label %42 + +38: ; preds = %26 + %39 = add nsw i64 %34, 3 + %40 = srem i64 %39, 4 + %41 = sub nsw i64 %39, %40 + %.sroa.speculated153 = tail call i64 @llvm.smin.i64(i64 %41, i64 %31) + br label %42 + +42: ; preds = %38, %35 + %storemerge184 = phi i64 [ %37, %35 ], [ %.sroa.speculated153, %38 ] store i64 %storemerge184, ptr %2, align 8, !tbaa !382 %41 = icmp sgt i64 %15, %14 br i1 %41, label %42, label %.critedge116 -42: ; preds = %40 +42: ; preds = %42 %43 = sub nsw i64 %15, %14 %44 = load i64, ptr %0, align 8, !tbaa !382 %45 = shl i64 %3, 3 %46 = mul i64 %45, %44 %47 = udiv i64 %43, %46 %48 = load i64, ptr %1, align 8, !tbaa !382 - %49 = add i64 %30, %48 + %49 = add i64 %32, %48 %50 = sdiv i64 %49, %3 %51 = icmp slt i64 %47, %50 %52 = icmp sgt i64 %47, 3 @@ -8277,87 +8277,87 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, %68 = and i64 %67, -8 %.sroa.speculated140 = tail call i64 @llvm.smax.i64(i64 %68, i64 1) %69 = icmp sgt i64 %63, %.sroa.speculated140 - br i1 %69, label %70, label %84 - -70: ; preds = %65 - %71 = urem i64 %63, %.sroa.speculated140 - %72 = udiv i64 %63, %.sroa.speculated140 - %73 = icmp eq i64 %71, 0 - br i1 %73, label %82, label %74 - -74: ; preds = %70 - %75 = xor i64 %71, -1 - %76 = add nsw i64 %.sroa.speculated140, %75 - %77 = shl i64 %72, 3 - %78 = add i64 %77, 8 - %79 = sdiv i64 %76, %78 - %80 = shl nsw i64 %79, 3 - %81 = sub nsw i64 %.sroa.speculated140, %80 - br label %82 - -82: ; preds = %70, %74 - %83 = phi i64 [ %81, %74 ], [ %.sroa.speculated140, %70 ] - store i64 %83, ptr %0, align 8, !tbaa !382 - %.pre = load i64, ptr %1, align 8, !tbaa !382 + br i1 %69, label %72, label %86 + +82: ; preds = %67 + %73 = urem i64 %65, %.sroa.speculated140 + %74 = udiv i64 %63, %.sroa.speculated138 + %75 = icmp eq i64 %73, 0 + br i1 %75, label %84, label %76 + +76: ; preds = %72 + %77 = xor i64 %73, -1 + %78 = add nsw i64 %.sroa.speculated140, %77 + %79 = shl i64 %74, 3 + %80 = add i64 %79, 8 + %81 = sdiv i64 %78, %80 + %82 = shl nsw i64 %81, 3 + %83 = sub nsw i64 %.sroa.speculated140, %82 br label %84 -84: ; preds = %82, %65 - %85 = phi i64 [ %83, %82 ], [ %63, %65 ] - %86 = phi i64 [ %.pre, %82 ], [ %60, %65 ] - %87 = shl i64 %86, 3 - %88 = mul i64 %87, %85 - %89 = sub nsw i64 %66, %88 - %90 = shl nsw i64 %85, 5 - %.not = icmp slt i64 %89, %90 - br i1 %.not, label %94, label %91 - -91: ; preds = %84 - %92 = shl i64 %85, 3 - %93 = udiv i64 %89, %92 - br label %97 +84: ; preds = %82, %76 + %85 = phi i64 [ %83, %76 ], [ %.sroa.speculated138, %72 ] + store i64 %85, ptr %0, align 8, !tbaa !382 + %.pre = load i64, ptr %1, align 8, !tbaa !382 + br label %86 -94: ; preds = %84 - %95 = shl i64 %.sroa.speculated140, 5 - %96 = udiv i64 4718592, %95 - br label %97 +94: ; preds = %84, %67 + %87 = phi i64 [ %85, %84 ], [ %65, %67 ] + %88 = phi i64 [ %.pre, %84 ], [ %62, %67 ] + %89 = shl i64 %88, 3 + %90 = mul i64 %89, %87 + %91 = sub nsw i64 %68, %90 + %92 = shl nsw i64 %87, 5 + %.not = icmp slt i64 %91, %92 + br i1 %.not, label %96, label %93 + +93: ; preds = %86 + %94 = shl i64 %87, 3 + %95 = udiv i64 %91, %94 + br label %99 + +96: ; preds = %86 + %97 = shl i64 %.sroa.speculated140, 5 + %98 = udiv i64 4718592, %97 + br label %99 + +99: ; preds = %96, %93 + %storemerge = phi i64 [ %98, %96 ], [ %95, %93 ] + %100 = shl i64 %87, 4 + %101 = udiv i64 1572864, %100 + %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %101) + %102 = and i64 %.sroa.speculated135, -4 + %103 = load i64, ptr %2, align 8, !tbaa !382 + %104 = icmp sgt i64 %103, %102 + br i1 %104, label %105, label %118 + +105: ; preds = %99 + %106 = srem i64 %103, %102 + %107 = sdiv i64 %103, %102 + %108 = icmp eq i64 %106, 0 + br i1 %108, label %116, label %109 + +109: ; preds = %105 + %110 = sub nsw i64 %102, %106 + %111 = shl i64 %107, 2 + %112 = add i64 %111, 4 + %113 = sdiv i64 %110, %112 + %114 = shl nsw i64 %113, 2 + %115 = sub nsw i64 %102, %114 + br label %116 -97: ; preds = %94, %91 - %storemerge = phi i64 [ %96, %94 ], [ %93, %91 ] - %98 = shl i64 %85, 4 - %99 = udiv i64 1572864, %98 - %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %99) - %100 = and i64 %.sroa.speculated135, -4 - %101 = load i64, ptr %2, align 8, !tbaa !382 - %102 = icmp sgt i64 %101, %100 - br i1 %102, label %103, label %116 - -103: ; preds = %97 - %104 = srem i64 %101, %100 - %105 = sdiv i64 %101, %100 - %106 = icmp eq i64 %104, 0 - br i1 %106, label %114, label %107 - -107: ; preds = %103 - %108 = sub nsw i64 %100, %104 - %109 = shl i64 %105, 2 - %110 = add i64 %109, 4 - %111 = sdiv i64 %108, %110 - %112 = shl nsw i64 %111, 2 - %113 = sub nsw i64 %100, %112 - br label %114 - -114: ; preds = %103, %107 - %115 = phi i64 [ %113, %107 ], [ %100, %103 ] - store i64 %115, ptr %2, align 8, !tbaa !382 +97: ; preds = %105, %109 + %storemerge = phi i64 [ %115, %109 ], [ %102, %105 ] + store i64 %117, ptr %2, align 8, !tbaa !382 br label %.critedge116 -116: ; preds = %97 - %117 = icmp eq i64 %63, %85 +116: ; preds = %99 + %117 = icmp eq i64 %63, %87 br i1 %117, label %118, label %.critedge116 118: ; preds = %116 %119 = shl i64 %63, 3 - %120 = mul i64 %119, %101 + %120 = mul i64 %119, %103 %121 = icmp slt i64 %120, 1025 br i1 %121, label %126, label %122 @@ -8368,12 +8368,12 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br i1 %or.cond4, label %125, label %126 125: ; preds = %122 - %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %86, i64 576) + %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %88, i64 576) br label %126 126: ; preds = %118, %122, %125 - %.0182 = phi i64 [ %.sroa.speculated128, %125 ], [ %86, %122 ], [ %86, %118 ] - %.093 = phi i64 [ %14, %125 ], [ 1572864, %122 ], [ %13, %118 ] + %.0182 = phi i64 [ %.sroa.speculated128, %127 ], [ %88, %124 ], [ %88, %120 ] + %.093 = phi i64 [ %14, %127 ], [ 1572864, %124 ], [ %13, %120 ] %127 = mul i64 %63, 24 %128 = udiv i64 %.093, %127 %.sroa.speculated = tail call i64 @llvm.smin.i64(i64 %.0182, i64 %128) @@ -8389,9 +8389,9 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br i1 %133, label %.critedge116, label %134 134: ; preds = %132, %130 - %.0 = phi i64 [ %131, %130 ], [ %.sroa.speculated, %132 ] - %135 = srem i64 %86, %.0 - %136 = sdiv i64 %86, %.0 + %.0 = phi i64 [ %131, %132 ], [ %.sroa.speculated, %134 ] + %135 = srem i64 %88, %.0 + %136 = sdiv i64 %88, %.0 %137 = icmp eq i64 %135, 0 br i1 %137, label %.critedge, label %138 @@ -8405,11 +8405,11 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %.critedge .critedge: ; preds = %134, %138 - %145 = phi i64 [ %144, %138 ], [ %.0, %134 ] + %145 = phi i64 [ %144, %140 ], [ %.0, %136 ] store i64 %145, ptr %1, align 8, !tbaa !382 br label %.critedge116 -.critedge116: ; preds = %.critedge, %116, %114, %132, %53, %55, %40, %59 +.critedge116: ; preds = %.critedge, %116, %116, %132, %53, %55, %42, %59 ret void } diff --git a/bench/cpython/optimized/socketmodule.ll b/bench/cpython/optimized/socketmodule.ll index 2f272ca8e75..b3978923204 100644 --- a/bench/cpython/optimized/socketmodule.ll +++ b/bench/cpython/optimized/socketmodule.ll @@ -9704,9 +9704,9 @@ define internal fastcc range(i32 -1, 1) i32 @sock_call_ex(ptr noundef %0, i32 no %17 = getelementptr inbounds nuw i8, ptr %0, i64 40 br i1 %or.cond, label %.split92.us, label %.split92 -.split92.us: ; preds = %7, %.split92.us.backedge - %.045.us = phi i64 [ %.348.us, %.split92.us.backedge ], [ 0, %7 ] - %.043.us = phi i32 [ %.3.us, %.split92.us.backedge ], [ 0, %7 ] +.split92.us: ; preds = %7, %.split91.us.backedge + %.045.us = phi i64 [ %.348.us, %.split91.us.backedge ], [ 0, %7 ] + %.043.us = phi i32 [ %.3.us, %.split91.us.backedge ], [ 0, %7 ] br i1 %10, label %28, label %18 18: ; preds = %.split92.us @@ -9719,8 +9719,8 @@ define internal fastcc range(i32 -1, 1) i32 @sock_call_ex(ptr noundef %0, i32 no store i32 %.val64.us, ptr %8, align 4, !tbaa !196 store i16 %spec.select.i67, ptr %14, align 4, !tbaa !198 %21 = call i64 @_PyTime_AsMilliseconds(i64 noundef %6, i32 noundef 1) #13 - %spec.store.select.i68.us = call i64 @llvm.smin.i64(i64 %21, i64 2147483647) - %spec.store.select1.i69.us = call i64 @llvm.smax.i64(i64 %spec.store.select.i68.us, i64 -1) + %spec.store.select.i68.us = call i64 @llvm.smax.i64(i64 %21, i64 -1) + %spec.store.select1.i69.us = call i64 @llvm.smin.i64(i64 %spec.store.select.i68.us, i64 2147483647) %22 = call ptr @PyEval_SaveThread() #13 %23 = trunc nsw i64 %spec.store.select1.i69.us to i32 %24 = call i32 @poll(ptr noundef nonnull %8, i64 noundef 1, i32 noundef %23) #13 @@ -9734,7 +9734,7 @@ define internal fastcc range(i32 -1, 1) i32 @sock_call_ex(ptr noundef %0, i32 no br label %internal_select.exit72.us internal_select.exit72.us: ; preds = %26, %20, %18 - %.0.i71.us = phi i32 [ 0, %18 ], [ -1, %20 ], [ %..i70.us, %26 ] + %.0.i71.us = phi i32 [ 0, %18 ], [ -1, %20 ], [ %..i70.us, %27 ] call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %8) #13 br label %43 @@ -9752,8 +9752,8 @@ internal_select.exit72.us: ; preds = %26, %20, %18 br label %33 33: ; preds = %.thread.us, %29 - %.04176.us = phi i64 [ %6, %.thread.us ], [ %30, %29 ] - %.24775.us = phi i64 [ %32, %.thread.us ], [ %.045.us, %29 ] + %.04176.us = phi i64 [ %6, %.thread.us ], [ %30, %30 ] + %.24775.us = phi i64 [ %32, %.thread.us ], [ %.045.us, %30 ] %.val.us = load i32, ptr %12, align 8, !tbaa !83 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %9) #13 %34 = icmp eq i32 %.val.us, -1 @@ -9763,8 +9763,8 @@ internal_select.exit72.us: ; preds = %26, %20, %18 store i32 %.val.us, ptr %9, align 4, !tbaa !196 store i16 %spec.select.i67, ptr %16, align 4, !tbaa !198 %36 = call i64 @_PyTime_AsMilliseconds(i64 noundef %.04176.us, i32 noundef 1) #13 - %spec.store.select.i.us = call i64 @llvm.smin.i64(i64 %36, i64 2147483647) - %spec.store.select1.i.us = call i64 @llvm.smax.i64(i64 %spec.store.select.i.us, i64 -1) + %spec.store.select.i.us = call i64 @llvm.smax.i64(i64 %36, i64 -1) + %spec.store.select1.i.us = call i64 @llvm.smin.i64(i64 %spec.store.select.i.us, i64 2147483647) %37 = call ptr @PyEval_SaveThread() #13 %38 = trunc nsw i64 %spec.store.select1.i.us to i32 %39 = call i32 @poll(ptr noundef nonnull %9, i64 noundef 1, i32 noundef %38) #13 @@ -9778,15 +9778,15 @@ internal_select.exit72.us: ; preds = %26, %20, %18 br label %internal_select.exit.us internal_select.exit.us: ; preds = %41, %35, %33 - %.0.i.us = phi i32 [ 0, %33 ], [ -1, %35 ], [ %..i.us, %41 ] + %.0.i.us = phi i32 [ 0, %34 ], [ -1, %36 ], [ %..i.us, %43 ] call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %9) #13 br label %43 43: ; preds = %internal_select.exit.us, %internal_select.exit72.us - %.348.us = phi i64 [ %.045.us, %internal_select.exit72.us ], [ %.24775.us, %internal_select.exit.us ] - %.3.us = phi i32 [ %.043.us, %internal_select.exit72.us ], [ 1, %internal_select.exit.us ] - %.1.us = phi i32 [ %.0.i71.us, %internal_select.exit72.us ], [ %.0.i.us, %internal_select.exit.us ] - switch i32 %.1.us, label %51 [ + %.348.us = phi i64 [ %.045.us, %internal_select.exit71.us ], [ %.24775.us, %internal_select.exit.us ] + %.3.us = phi i32 [ %.043.us, %internal_select.exit71.us ], [ 1, %internal_select.exit.us ] + %.1.us = phi i32 [ %.0.i71.us, %internal_select.exit71.us ], [ %.0.i.us, %internal_select.exit.us ] + switch i32 %.1.us, label %53 [ i32 -1, label %44 i32 1, label %.thread77 ] @@ -9808,12 +9808,12 @@ internal_select.exit.us: ; preds = %41, %35, %33 49: ; preds = %._crit_edge %50 = call i32 @PyErr_CheckSignals() #13 %.not62.us = icmp eq i32 %50, 0 - br i1 %.not62.us, label %.split92.us.backedge, label %.split101.us + br i1 %.not62.us, label %.split91.us.backedge, label %.split101.us 51: ; preds = %43 br i1 %.not61, label %.split.us.us, label %.split.us97 -.split.us97: ; preds = %51, %58 +.split.us97: ; preds = %51, %60 %52 = call ptr @PyEval_SaveThread() #13 %53 = call i32 %2(ptr noundef %0, ptr noundef %3) #13 call void @PyEval_RestoreThread(ptr noundef %52) #13 @@ -9825,40 +9825,40 @@ internal_select.exit.us: ; preds = %41, %35, %33 %56 = load i32, ptr %55, align 4, !tbaa !35 store i32 %56, ptr %5, align 4, !tbaa !35 %57 = icmp eq i32 %56, 4 - br i1 %57, label %58, label %.split87.us94 - -58: ; preds = %54 - %59 = call i32 @PyErr_CheckSignals() #13 - %.not58.us96 = icmp eq i32 %59, 0 - br i1 %.not58.us96, label %.split.us97, label %.split90.us - -.split87.us94: ; preds = %54, %65 - %.us-phi.us = phi i32 [ %67, %65 ], [ %56, %54 ] - %60 = load i64, ptr %17, align 8, !tbaa !90 - %61 = icmp sgt i64 %60, 0 - %62 = icmp eq i32 %.us-phi.us, 11 - %or.cond63.us = and i1 %62, %61 - br i1 %or.cond63.us, label %.split92.us.backedge, label %.split106.us - -.split92.us.backedge: ; preds = %.split87.us94, %49 - br label %.split92.us - -.split.us.us: ; preds = %51, %69 - %63 = call ptr @PyEval_SaveThread() #13 - %64 = call i32 %2(ptr noundef %0, ptr noundef %3) #13 - call void @PyEval_RestoreThread(ptr noundef %63) #13 - %.not56.us.us = icmp eq i32 %64, 0 - br i1 %.not56.us.us, label %65, label %.split85.us - -65: ; preds = %.split.us.us - %66 = tail call ptr @__errno_location() #14 - %67 = load i32, ptr %66, align 4, !tbaa !35 - %68 = icmp eq i32 %67, 4 - br i1 %68, label %69, label %.split87.us94 + br i1 %57, label %60, label %.split86.us93 -69: ; preds = %65 +69: ; preds = %56 %70 = call i32 @PyErr_CheckSignals() #13 %.not58.us.us = icmp eq i32 %70, 0 + br i1 %.not58.us95, label %.split.us96, label %.split89.us + +.split86.us93: ; preds = %56, %67 + %.us-phi.us = phi i32 [ %69, %67 ], [ %58, %56 ] + %62 = load i64, ptr %17, align 8, !tbaa !90 + %63 = icmp sgt i64 %62, 0 + %64 = icmp eq i32 %.us-phi.us, 11 + %or.cond63.us = and i1 %64, %63 + br i1 %or.cond63.us, label %.split91.us.backedge, label %.split105.us + +.split91.us.backedge: ; preds = %.split86.us93, %51 + br label %.split91.us + +.split.us.us: ; preds = %53, %71 + %65 = call ptr @PyEval_SaveThread() #13 + %66 = call i32 %2(ptr noundef %0, ptr noundef %3) #13 + call void @PyEval_RestoreThread(ptr noundef %65) #13 + %.not56.us.us = icmp eq i32 %66, 0 + br i1 %.not56.us.us, label %67, label %.split84.us + +67: ; preds = %.split.us.us + %68 = tail call ptr @__errno_location() #14 + %69 = load i32, ptr %68, align 4, !tbaa !35 + %70 = icmp eq i32 %69, 4 + br i1 %70, label %71, label %.split86.us93 + +71: ; preds = %67 + %72 = call i32 @PyErr_CheckSignals() #13 + %.not58.us.us = icmp eq i32 %72, 0 br i1 %.not58.us.us, label %.split.us.us, label %.critedge .split92: ; preds = %7 @@ -9902,7 +9902,7 @@ internal_select.exit.us: ; preds = %41, %35, %33 .split99.us: ; preds = %._crit_edge %83 = getelementptr inbounds nuw i8, ptr %0, i64 32 %84 = load ptr, ptr %83, align 8, !tbaa !89 - %85 = call ptr %84() #13 + %85 = call ptr %86() #13 br label %.critedge .thread77: ; preds = %29, %43 @@ -9946,7 +9946,7 @@ internal_select.exit.us: ; preds = %41, %35, %33 .split.backedge: ; preds = %95, %.split87 br label %.split -.split90.us: ; preds = %95, %58 +.split90.us: ; preds = %95, %60 store i32 -1, ptr %5, align 4, !tbaa !35 br label %.critedge @@ -9957,17 +9957,17 @@ internal_select.exit.us: ; preds = %41, %35, %33 %or.cond63 = and i1 %99, %98 br i1 %or.cond63, label %.split.backedge, label %.critedge -.split106.us: ; preds = %.split87.us94 +.split106.us: ; preds = %.split86.us93 br i1 %.not61, label %.critedge122, label %.critedge .critedge122: ; preds = %.split87.us.us116, %.split106.us %100 = getelementptr inbounds nuw i8, ptr %0, i64 32 %101 = load ptr, ptr %100, align 8, !tbaa !89 - %102 = call ptr %101() #13 + %102 = call ptr %103() #13 br label %.critedge -.critedge: ; preds = %.split87, %77, %69, %.split106.us, %.critedge122, %.split90.us, %.split85.us, %.split85.us.thread, %86, %87, %.split101.us, %82, %.split99.us - %.0 = phi i32 [ -1, %.split99.us ], [ -1, %82 ], [ -1, %.split101.us ], [ -1, %87 ], [ -1, %86 ], [ 0, %.split85.us.thread ], [ 0, %.split85.us ], [ -1, %.split90.us ], [ -1, %.critedge122 ], [ -1, %.split106.us ], [ -1, %69 ], [ -1, %77 ], [ -1, %.split87 ] +.critedge: ; preds = %.split87, %77, %71, %.split106.us, %.critedge122, %.split90.us, %.split85.us, %.split85.us.thread, %86, %87, %.split101.us, %82, %.split99.us + %.0 = phi i32 [ -1, %.split98.us ], [ -1, %84 ], [ -1, %.split100.us ], [ -1, %89 ], [ -1, %88 ], [ 0, %.split84.us.thread ], [ 0, %.split84.us ], [ -1, %.split89.us ], [ -1, %.critedge121 ], [ -1, %.split105.us ], [ -1, %71 ], [ -1, %79 ], [ -1, %.split86 ] ret i32 %.0 } diff --git a/bench/darktable/optimized/decoders_libraw.ll b/bench/darktable/optimized/decoders_libraw.ll index 298dd596056..d3a22781bf6 100644 --- a/bench/darktable/optimized/decoders_libraw.ll +++ b/bench/darktable/optimized/decoders_libraw.ll @@ -3535,8 +3535,8 @@ _ZN15iiq_bitstream_t3getEc.exit94: ; preds = %_ZN15iiq_bitstream_ %.sroa.0.5 = phi i64 [ %.pre.i.i.i85155, %_ZN15iiq_bitstream_t3getEc.exit87 ], [ %.sroa.0.20, %_ZN15iiq_bitstream_t3getEc.exit94 ] %.045 = phi i32 [ %165, %_ZN15iiq_bitstream_t3getEc.exit87 ], [ %200, %_ZN15iiq_bitstream_t3getEc.exit94 ] %202 = shl i32 %.045, 2 - %203 = tail call i32 @llvm.smin.i32(i32 %202, i32 65535) - %204 = tail call i32 @llvm.smax.i32(i32 %203, i32 0) + %203 = tail call i32 @llvm.smax.i32(i32 %202, i32 0) + %204 = tail call i32 @llvm.umin.i32(i32 %203, i32 65535) %205 = trunc nuw i32 %204 to i16 %206 = getelementptr inbounds nuw i16, ptr %.1177, i64 %indvars.iv store i16 %205, ptr %206, align 2, !tbaa !80 @@ -4355,9 +4355,6 @@ declare i32 @llvm.bswap.i32(i32) #12 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i8 @llvm.fshl.i8(i8, i8, i8) #12 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #12 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i16 @llvm.umax.i16(i16, i16) #12 diff --git a/bench/darktable/optimized/identify.ll b/bench/darktable/optimized/identify.ll index 6193f4952dc..b51f8d257bc 100644 --- a/bench/darktable/optimized/identify.ll +++ b/bench/darktable/optimized/identify.ll @@ -8385,18 +8385,19 @@ define void @_ZN6LibRaw27identify_process_dng_fieldsEv(ptr noundef nonnull align ._crit_edge664: ; preds = %.lr.ph663, %.preheader552 %.0323.lcssa = phi i32 [ 0, %.preheader552 ], [ %694, %.lr.ph663 ] - %685 = call i32 @llvm.smax.i32(i32 %invariant.smin660, i32 1) - %686 = sdiv i32 %.0323.lcssa, %685 - %687 = getelementptr inbounds nuw i8, ptr %0, i64 136592 - %688 = load i32, ptr %687, align 8, !tbaa !92 - %689 = getelementptr inbounds nuw i8, ptr %0, i64 136596 - %690 = load i32, ptr %689, align 4, !tbaa !92 - %691 = mul i32 %690, %688 - %.not489 = icmp eq i32 %691, 0 - br i1 %.not489, label %701, label %.preheader + %685 = call i32 @llvm.smax.i32(i32 %683, i32 1) + %686 = call i32 @llvm.umin.i32(i32 %685, i32 4) + %687 = sdiv i32 %.0323.lcssa, %686 + %688 = getelementptr inbounds nuw i8, ptr %0, i64 136592 + %689 = load i32, ptr %688, align 8, !tbaa !92 + %690 = getelementptr inbounds nuw i8, ptr %0, i64 136596 + %691 = load i32, ptr %690, align 4, !tbaa !92 + %692 = mul i32 %691, %689 + %.not489 = icmp eq i32 %692, 0 + br i1 %.not489, label %702, label %.preheader .preheader: ; preds = %._crit_edge664 - %invariant.umin667 = call i32 @llvm.umin.i32(i32 %691, i32 4096) + %invariant.umin667 = call i32 @llvm.umin.i32(i32 %692, i32 4096) %wide.trip.count800 = zext nneg i32 %invariant.umin667 to i64 br label %696 @@ -8415,8 +8416,8 @@ define void @_ZN6LibRaw27identify_process_dng_fieldsEv(ptr noundef nonnull align br label %701 696: ; preds = %.preheader, %696 - %indvars.iv795 = phi i64 [ 0, %.preheader ], [ %indvars.iv.next796, %696 ] - %.1322668 = phi i32 [ 0, %.preheader ], [ %700, %696 ] + %indvars.iv795 = phi i64 [ 0, %.preheader ], [ %indvars.iv.next796, %697 ] + %.1322668 = phi i32 [ 0, %.preheader ], [ %700, %697 ] %697 = add nuw nsw i64 %indvars.iv795, 6 %698 = getelementptr inbounds nuw [4104 x i32], ptr %656, i64 0, i64 %697 %699 = load i32, ptr %698, align 4, !tbaa !92 @@ -8430,7 +8431,7 @@ define void @_ZN6LibRaw27identify_process_dng_fieldsEv(ptr noundef nonnull align br i1 %684, label %.lr.ph675, label %._crit_edge676 .lr.ph675: ; preds = %701 - %702 = add i32 %.0321, %686 + %702 = add i32 %.0321, %687 %703 = add i32 %702, %655 %704 = sub i32 %522, %703 %705 = uitofp i32 %704 to float @@ -8449,7 +8450,7 @@ define void @_ZN6LibRaw27identify_process_dng_fieldsEv(ptr noundef nonnull align br i1 %.not490, label %719, label %715 713: ; preds = %.lr.ph675, %713 - %indvars.iv802 = phi i64 [ 0, %.lr.ph675 ], [ %indvars.iv.next803, %713 ] + %indvars.iv802 = phi i64 [ 0, %.lr.ph675 ], [ %indvars.iv.next803, %714 ] %714 = getelementptr inbounds nuw [4 x i64], ptr %710, i64 0, i64 %indvars.iv802 store i64 %709, ptr %714, align 8, !tbaa !259 %indvars.iv.next803 = add nuw nsw i64 %indvars.iv802, 1 diff --git a/bench/darktable/optimized/kodak_decoders.ll b/bench/darktable/optimized/kodak_decoders.ll index a082a94489d..7d20efa56f5 100644 --- a/bench/darktable/optimized/kodak_decoders.ll +++ b/bench/darktable/optimized/kodak_decoders.ll @@ -1572,8 +1572,8 @@ _ZNSt6vectorIhSaIhEED2Ev.exit: ; preds = %41, %35, %25, %24 %indvars.iv = phi i64 [ 0, %53 ], [ %indvars.iv.next, %71 ] %72 = getelementptr inbounds nuw [3 x i32], ptr %2, i64 0, i64 %indvars.iv %73 = load i32, ptr %72, align 4, !tbaa !142 - %narrow = tail call i32 @llvm.smin.i32(i32 %73, i32 255) - %narrow43 = tail call i32 @llvm.smax.i32(i32 %narrow, i32 0) + %narrow = tail call i32 @llvm.smax.i32(i32 %73, i32 0) + %narrow43 = tail call i32 @llvm.umin.i32(i32 %narrow, i32 255) %74 = zext nneg i32 %narrow43 to i64 %75 = getelementptr inbounds nuw [65536 x i16], ptr %23, i64 0, i64 %74 %76 = load i16, ptr %75, align 2, !tbaa !75 @@ -1756,8 +1756,8 @@ _ZNSt6vectorIhSaIhEEC2EmRKS0_.exit: ; preds = %.noexc, %7 %indvars.iv61 = phi i64 [ 0, %.lr.ph.split.us ], [ %indvars.iv.next62, %75 ] %76 = getelementptr inbounds nuw [3 x i32], ptr %2, i64 0, i64 %indvars.iv61 %77 = load i32, ptr %76, align 4, !tbaa !142 - %narrow.us = tail call i32 @llvm.smin.i32(i32 %77, i32 255) - %narrow47.us = tail call i32 @llvm.smax.i32(i32 %narrow.us, i32 0) + %narrow.us = tail call i32 @llvm.smax.i32(i32 %77, i32 0) + %narrow47.us = tail call i32 @llvm.umin.i32(i32 %narrow.us, i32 255) %78 = zext nneg i32 %narrow47.us to i64 %79 = getelementptr inbounds nuw [65536 x i16], ptr %23, i64 0, i64 %78 %80 = load i16, ptr %79, align 2, !tbaa !75 @@ -1773,8 +1773,8 @@ _ZNSt6vectorIhSaIhEEC2EmRKS0_.exit: ; preds = %.noexc, %7 br i1 %exitcond64.not, label %70, label %75, !llvm.loop !151 .lr.ph.split: ; preds = %.lr.ph, %119 - %indvars.iv58 = phi i64 [ %indvars.iv.next59, %119 ], [ 0, %.lr.ph ] - %87 = phi i32 [ %121, %119 ], [ %45, %.lr.ph ] + %indvars.iv58 = phi i64 [ %indvars.iv.next59, %123 ], [ 0, %.lr.ph ] + %87 = phi i32 [ %121, %123 ], [ %45, %.lr.ph ] %88 = getelementptr inbounds nuw i8, ptr %.sroa.037.0, i64 %indvars.iv58 %89 = load i8, ptr %88, align 1, !tbaa !79 %90 = zext i8 %89 to i32 @@ -1802,11 +1802,11 @@ _ZNSt6vectorIhSaIhEEC2EmRKS0_.exit: ; preds = %.noexc, %7 br label %107 107: ; preds = %.lr.ph.split, %107 - %indvars.iv = phi i64 [ 0, %.lr.ph.split ], [ %indvars.iv.next, %107 ] + %indvars.iv = phi i64 [ 0, %.lr.ph.split ], [ %indvars.iv.next, %109 ] %108 = getelementptr inbounds nuw [3 x i32], ptr %2, i64 0, i64 %indvars.iv %109 = load i32, ptr %108, align 4, !tbaa !142 - %narrow = tail call i32 @llvm.smin.i32(i32 %109, i32 255) - %narrow47 = tail call i32 @llvm.smax.i32(i32 %narrow, i32 0) + %narrow = tail call i32 @llvm.smax.i32(i32 %109, i32 0) + %narrow47 = tail call i32 @llvm.umin.i32(i32 %narrow, i32 255) %110 = zext nneg i32 %narrow47 to i64 %111 = getelementptr inbounds nuw [65536 x i16], ptr %23, i64 0, i64 %110 %112 = load i16, ptr %111, align 2, !tbaa !75 @@ -2652,8 +2652,8 @@ define void @_ZN6LibRaw20kodak_ycbcr_load_rawEv(ptr noundef nonnull align 8 dere %74 = getelementptr inbounds nuw [3 x i32], ptr %4, i64 0, i64 %indvars.iv %75 = load i32, ptr %74, align 4, !tbaa !142 %76 = add nsw i32 %75, %60 - %narrow = tail call i32 @llvm.smin.i32(i32 %76, i32 4095) - %narrow73 = tail call i32 @llvm.smax.i32(i32 %narrow, i32 0) + %narrow = tail call i32 @llvm.smax.i32(i32 %76, i32 0) + %narrow73 = tail call i32 @llvm.umin.i32(i32 %narrow, i32 4095) %77 = zext nneg i32 %narrow73 to i64 %78 = getelementptr inbounds nuw [65536 x i16], ptr %25, i64 0, i64 %77 %79 = load i16, ptr %78, align 2, !tbaa !75 @@ -3029,9 +3029,6 @@ declare i64 @llvm.smin.i64(i64, i64) #11 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.umax.i32(i32, i32) #11 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #11 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.umin.i32(i32, i32) #11 diff --git a/bench/darktable/optimized/load_mfbacks.ll b/bench/darktable/optimized/load_mfbacks.ll index 51015de0280..85f766e5275 100644 --- a/bench/darktable/optimized/load_mfbacks.ll +++ b/bench/darktable/optimized/load_mfbacks.ll @@ -1944,8 +1944,8 @@ _ZNSt6vectorIjSaIjEE9push_backERKj.exit: ; preds = %129, %199, %200, %_ %489 = uitofp i16 %488 to float %490 = fmul reassoc nsz arcp contract afn float %483, %489 %491 = fptosi float %490 to i32 - %492 = call i32 @llvm.smin.i32(i32 %491, i32 65535) - %493 = call i32 @llvm.smax.i32(i32 %492, i32 0) + %492 = call i32 @llvm.smax.i32(i32 %491, i32 0) + %493 = call i32 @llvm.umin.i32(i32 %492, i32 65535) %494 = trunc nuw i32 %493 to i16 store i16 %494, ptr %487, align 2, !tbaa !74 %495 = add nuw nsw i32 %storemerge316692, 1 @@ -2658,8 +2658,8 @@ _ZN6LibRaw27phase_one_fix_col_pixel_avgEjj.exit: ; preds = %634, %656 %801 = fadd reassoc nsz arcp contract afn float %800, %751 %802 = fmul reassoc nsz arcp contract afn float %801, 2.000000e+00 %803 = fptosi float %802 to i32 - %804 = call i32 @llvm.smin.i32(i32 %803, i32 65535) - %805 = call i32 @llvm.smax.i32(i32 %804, i32 0) + %804 = call i32 @llvm.smax.i32(i32 %803, i32 0) + %805 = call i32 @llvm.umin.i32(i32 %804, i32 65535) %806 = trunc nuw i32 %805 to i16 store i16 %806, ptr %747, align 2, !tbaa !74 %807 = add nuw nsw i32 %storemerge745.us.us, 1 @@ -2748,8 +2748,8 @@ _ZN6LibRaw27phase_one_fix_col_pixel_avgEjj.exit: ; preds = %634, %656 %851 = fadd reassoc nsz arcp contract afn float %850, %844 %852 = fmul reassoc nsz arcp contract afn float %851, 2.000000e+00 %853 = fptosi float %852 to i32 - %854 = call i32 @llvm.smin.i32(i32 %853, i32 65535) - %855 = call i32 @llvm.smax.i32(i32 %854, i32 0) + %854 = call i32 @llvm.smax.i32(i32 %853, i32 0) + %855 = call i32 @llvm.umin.i32(i32 %854, i32 65535) %856 = trunc nuw i32 %855 to i16 store i16 %856, ptr %835, align 2, !tbaa !74 %857 = add nuw nsw i32 %storemerge745, 1 @@ -4936,9 +4936,6 @@ declare i32 @llvm.umin.i32(i32, i32) #17 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.umax.i32(i32, i32) #17 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #17 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #17 diff --git a/bench/faiss/optimized/AdditiveQuantizer.ll b/bench/faiss/optimized/AdditiveQuantizer.ll index ba03de31c6f..9f8b7b1974d 100644 --- a/bench/faiss/optimized/AdditiveQuantizer.ll +++ b/bench/faiss/optimized/AdditiveQuantizer.ll @@ -1858,8 +1858,8 @@ define noundef range(i64 0, 4294967296) i64 @_ZNK5faiss17AdditiveQuantizer11enco %20 = fmul float %19, 2.560000e+02 %21 = tail call float @llvm.floor.f32(float %20) %22 = fptosi float %21 to i32 - %23 = tail call i32 @llvm.smin.i32(i32 %22, i32 255) - %24 = tail call i32 @llvm.smax.i32(i32 %23, i32 0) + %23 = tail call i32 @llvm.smax.i32(i32 %22, i32 0) + %24 = tail call i32 @llvm.umin.i32(i32 %23, i32 255) %25 = zext nneg i32 %24 to i64 br label %48 @@ -1874,8 +1874,8 @@ define noundef range(i64 0, 4294967296) i64 @_ZNK5faiss17AdditiveQuantizer11enco %34 = fmul float %33, 1.600000e+01 %35 = tail call float @llvm.floor.f32(float %34) %36 = fptosi float %35 to i32 - %37 = tail call i32 @llvm.smin.i32(i32 %36, i32 15) - %38 = tail call i32 @llvm.smax.i32(i32 %37, i32 0) + %37 = tail call i32 @llvm.smax.i32(i32 %36, i32 0) + %38 = tail call i32 @llvm.umin.i32(i32 %37, i32 15) %39 = zext nneg i32 %38 to i64 br label %48 @@ -5477,9 +5477,6 @@ declare void @llvm.assume(i1 noundef) #23 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.floor.f32(float) #24 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #24 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #24 @@ -5492,6 +5489,9 @@ declare i64 @llvm.smin.i64(i64, i64) #24 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umax.i64(i64, i64) #24 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #24 + attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { cold mustprogress noreturn nounwind memory(inaccessiblemem: write) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/freetype/optimized/sdf.ll b/bench/freetype/optimized/sdf.ll index ffab7c492d2..2791c0706bf 100644 --- a/bench/freetype/optimized/sdf.ll +++ b/bench/freetype/optimized/sdf.ll @@ -3086,7 +3086,7 @@ get_control_box.exit.i: ; preds = %.critedge4.i.i, %.l br i1 %221, label %.lr.ph.us.i, label %._crit_edge229.i, !llvm.loop !162 .lr.ph.split.us.us.i: ; preds = %756, %.lr.ph.split.us.us.preheader.i - %indvars.iv.i = phi i64 [ %212, %.lr.ph.split.us.us.preheader.i ], [ %indvars.iv.next.i, %756 ] + %indvars.iv.i = phi i64 [ %212, %.lr.ph.split.us.us.preheader.i ], [ %indvars.iv.next.i, %758 ] %222 = icmp sgt i64 %indvars.iv.i, -1 %.not166.us.us.i = icmp slt i64 %indvars.iv.i, %117 %or.cond170.us.us.i = and i1 %222, %.not166.us.us.i @@ -3704,8 +3704,8 @@ get_min_distance_conic.exit.i.us.us.i: ; preds = %623, %585 %663 = ashr exact i64 %sext39.i.i.us.us.i, 32 %664 = call i64 @FT_DivFix(i64 noundef %662, i64 noundef %663) #11 %665 = trunc i64 %664 to i32 - %spec.store.select.i.i.us.us.i = call i32 @llvm.smin.i32(i32 %665, i32 65536) - %spec.store.select4.i.i.us.us.i = call i32 @llvm.smax.i32(i32 %spec.store.select.i.i.us.us.i, i32 0) + %spec.store.select.i.i.us.us.i = call i32 @llvm.smax.i32(i32 %665, i32 0) + %spec.store.select4.i.i.us.us.i = call i32 @llvm.umin.i32(i32 %spec.store.select.i.i.us.us.i, i32 65536) %.tr.i.i.us.us.i = trunc i64 %648 to i32 %666 = shl i32 %.tr.i.i.us.us.i, 10 %667 = sext i32 %666 to i64 @@ -3790,7 +3790,7 @@ get_min_distance_conic.exit.i.us.us.i: ; preds = %623, %585 br label %get_min_distance_line.exit.i.us.us.i get_min_distance_line.exit.i.us.us.i: ; preds = %708, %647 - %.sink.i.i.us.us.i = phi i32 [ %731, %708 ], [ 65536, %647 ] + %.sink.i.i.us.us.i = phi i32 [ %731, %710 ], [ 65536, %647 ] call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %13) #11 call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %12) #11 br label %732 diff --git a/bench/g2o/optimized/edge_se2_lotsofxy.ll b/bench/g2o/optimized/edge_se2_lotsofxy.ll index f133f1b8d3c..6bdb1ca5acc 100644 --- a/bench/g2o/optimized/edge_se2_lotsofxy.ll +++ b/bench/g2o/optimized/edge_se2_lotsofxy.ll @@ -6644,8 +6644,8 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, 17: ; preds = %_ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit %18 = add nsw i64 %13, -128 %19 = sdiv i64 %18, 64 - %.sroa.speculated157 = tail call i64 @llvm.smin.i64(i64 %19, i64 320) - %.sroa.speculated163 = tail call i64 @llvm.smax.i64(i64 %.sroa.speculated157, i64 8) + %.sroa.speculated157 = tail call i64 @llvm.smax.i64(i64 %19, i64 8) + %.sroa.speculated163 = tail call i64 @llvm.umin.i64(i64 %.sroa.speculated157, i64 320) %20 = load i64, ptr %0, align 8, !tbaa !131 %21 = icmp slt i64 %.sroa.speculated163, %20 br i1 %21, label %22, label %24 @@ -6656,7 +6656,7 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %24 24: ; preds = %22, %17 - %25 = phi i64 [ %23, %22 ], [ %20, %17 ] + %25 = phi i64 [ %23, %24 ], [ %20, %17 ] %26 = sub nsw i64 %14, %13 %27 = shl i64 %25, 5 %28 = udiv i64 %26, %27 @@ -6680,7 +6680,7 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %40 40: ; preds = %36, %33 - %storemerge184 = phi i64 [ %35, %33 ], [ %.sroa.speculated153, %36 ] + %storemerge184 = phi i64 [ %35, %35 ], [ %.sroa.speculated153, %38 ] store i64 %storemerge184, ptr %2, align 8, !tbaa !131 %41 = icmp sgt i64 %15, %14 br i1 %41, label %42, label %.critedge116 @@ -6727,7 +6727,7 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, %68 = and i64 %67, -8 %.sroa.speculated140 = tail call i64 @llvm.smax.i64(i64 %68, i64 1) %69 = icmp sgt i64 %63, %.sroa.speculated140 - br i1 %69, label %70, label %84 + br i1 %69, label %70, label %86 70: ; preds = %65 %71 = urem i64 %63, %.sroa.speculated140 @@ -6746,68 +6746,68 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %82 82: ; preds = %70, %74 - %83 = phi i64 [ %81, %74 ], [ %.sroa.speculated140, %70 ] + %83 = phi i64 [ %81, %76 ], [ %.sroa.speculated140, %72 ] store i64 %83, ptr %0, align 8, !tbaa !131 %.pre = load i64, ptr %1, align 8, !tbaa !131 - br label %84 - -84: ; preds = %82, %65 - %85 = phi i64 [ %83, %82 ], [ %63, %65 ] - %86 = phi i64 [ %.pre, %82 ], [ %60, %65 ] - %87 = shl i64 %86, 3 - %88 = mul i64 %87, %85 - %89 = sub nsw i64 %66, %88 - %90 = shl nsw i64 %85, 5 - %.not = icmp slt i64 %89, %90 - br i1 %.not, label %94, label %91 - -91: ; preds = %84 - %92 = shl i64 %85, 3 - %93 = udiv i64 %89, %92 - br label %97 - -94: ; preds = %84 - %95 = shl i64 %.sroa.speculated140, 5 - %96 = udiv i64 4718592, %95 - br label %97 - -97: ; preds = %94, %91 - %storemerge = phi i64 [ %96, %94 ], [ %93, %91 ] - %98 = shl i64 %85, 4 - %99 = udiv i64 1572864, %98 - %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %99) - %100 = and i64 %.sroa.speculated135, -4 - %101 = load i64, ptr %2, align 8, !tbaa !131 - %102 = icmp sgt i64 %101, %100 - br i1 %102, label %103, label %116 - -103: ; preds = %97 - %104 = srem i64 %101, %100 - %105 = sdiv i64 %101, %100 - %106 = icmp eq i64 %104, 0 - br i1 %106, label %114, label %107 - -107: ; preds = %103 - %108 = sub nsw i64 %100, %104 - %109 = shl i64 %105, 2 - %110 = add i64 %109, 4 - %111 = sdiv i64 %108, %110 - %112 = shl nsw i64 %111, 2 - %113 = sub nsw i64 %100, %112 - br label %114 - -114: ; preds = %103, %107 - %115 = phi i64 [ %113, %107 ], [ %100, %103 ] - store i64 %115, ptr %2, align 8, !tbaa !131 + br label %86 + +86: ; preds = %84, %67 + %87 = phi i64 [ %85, %84 ], [ %65, %67 ] + %88 = phi i64 [ %.pre, %84 ], [ %62, %67 ] + %89 = shl i64 %88, 3 + %90 = mul i64 %89, %87 + %91 = sub nsw i64 %68, %90 + %92 = shl nsw i64 %87, 5 + %.not = icmp slt i64 %91, %92 + br i1 %.not, label %96, label %93 + +93: ; preds = %86 + %94 = shl i64 %87, 3 + %95 = udiv i64 %91, %94 + br label %99 + +96: ; preds = %86 + %97 = shl i64 %.sroa.speculated140, 5 + %98 = udiv i64 4718592, %97 + br label %99 + +99: ; preds = %96, %93 + %storemerge = phi i64 [ %98, %96 ], [ %95, %93 ] + %100 = shl i64 %87, 4 + %101 = udiv i64 1572864, %100 + %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %101) + %102 = and i64 %.sroa.speculated135, -4 + %103 = load i64, ptr %2, align 8, !tbaa !131 + %104 = icmp sgt i64 %103, %102 + br i1 %104, label %105, label %118 + +105: ; preds = %99 + %106 = srem i64 %103, %102 + %107 = sdiv i64 %103, %102 + %108 = icmp eq i64 %106, 0 + br i1 %108, label %116, label %109 + +84: ; preds = %105 + %110 = sub nsw i64 %102, %106 + %111 = shl i64 %107, 2 + %112 = add i64 %111, 4 + %113 = sdiv i64 %110, %112 + %114 = shl nsw i64 %113, 2 + %115 = sub nsw i64 %102, %114 + br label %116 + +116: ; preds = %105, %109 + %117 = phi i64 [ %115, %109 ], [ %102, %105 ] + store i64 %117, ptr %2, align 8, !tbaa !131 br label %.critedge116 -116: ; preds = %97 - %117 = icmp eq i64 %63, %85 +116: ; preds = %99 + %117 = icmp eq i64 %63, %87 br i1 %117, label %118, label %.critedge116 118: ; preds = %116 %119 = shl i64 %63, 3 - %120 = mul i64 %119, %101 + %120 = mul i64 %119, %103 %121 = icmp slt i64 %120, 1025 br i1 %121, label %126, label %122 @@ -6818,12 +6818,12 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br i1 %or.cond4, label %125, label %126 125: ; preds = %122 - %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %86, i64 576) + %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %88, i64 576) br label %126 126: ; preds = %118, %122, %125 - %.0182 = phi i64 [ %.sroa.speculated128, %125 ], [ %86, %122 ], [ %86, %118 ] - %.093 = phi i64 [ %14, %125 ], [ 1572864, %122 ], [ %13, %118 ] + %.0182 = phi i64 [ %.sroa.speculated128, %127 ], [ %88, %124 ], [ %88, %120 ] + %.093 = phi i64 [ %14, %127 ], [ 1572864, %124 ], [ %13, %120 ] %127 = mul i64 %63, 24 %128 = udiv i64 %.093, %127 %.sroa.speculated = tail call i64 @llvm.smin.i64(i64 %.0182, i64 %128) @@ -6839,9 +6839,9 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br i1 %133, label %.critedge116, label %134 134: ; preds = %132, %130 - %.0 = phi i64 [ %131, %130 ], [ %.sroa.speculated, %132 ] - %135 = srem i64 %86, %.0 - %136 = sdiv i64 %86, %.0 + %.0 = phi i64 [ %131, %132 ], [ %.sroa.speculated, %134 ] + %135 = srem i64 %88, %.0 + %136 = sdiv i64 %88, %.0 %137 = icmp eq i64 %135, 0 br i1 %137, label %.critedge, label %138 @@ -6855,11 +6855,11 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %.critedge .critedge: ; preds = %134, %138 - %145 = phi i64 [ %144, %138 ], [ %.0, %134 ] + %145 = phi i64 [ %144, %140 ], [ %.0, %136 ] store i64 %145, ptr %1, align 8, !tbaa !131 br label %.critedge116 -.critedge116: ; preds = %.critedge, %116, %114, %132, %53, %55, %40, %59 +.critedge116: ; preds = %.critedge, %116, %116, %132, %53, %55, %40, %59 ret void } @@ -12118,12 +12118,12 @@ declare i64 @llvm.smax.i64(i64, i64) #31 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umax.i64(i64, i64) #31 -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memmove.p0.p0.i64(ptr writeonly captures(none), ptr readonly captures(none), i64, i1 immarg) #33 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umin.i64(i64, i64) #31 +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memmove.p0.p0.i64(ptr writeonly captures(none), ptr readonly captures(none), i64, i1 immarg) #33 + ; Function Attrs: nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) memory(inaccessiblemem: readwrite) declare noalias noundef ptr @calloc(i64 noundef, i64 noundef) local_unnamed_addr #34 diff --git a/bench/g2o/optimized/edge_se3_calib.ll b/bench/g2o/optimized/edge_se3_calib.ll index 2d14d3ed8d7..ae7e82d8360 100644 --- a/bench/g2o/optimized/edge_se3_calib.ll +++ b/bench/g2o/optimized/edge_se3_calib.ll @@ -5210,8 +5210,8 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, 17: ; preds = %_ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit %18 = add nsw i64 %13, -128 %19 = sdiv i64 %18, 64 - %.sroa.speculated157 = tail call i64 @llvm.smin.i64(i64 %19, i64 320) - %.sroa.speculated163 = tail call i64 @llvm.smax.i64(i64 %.sroa.speculated157, i64 8) + %.sroa.speculated157 = tail call i64 @llvm.smax.i64(i64 %19, i64 8) + %.sroa.speculated163 = tail call i64 @llvm.umin.i64(i64 %.sroa.speculated157, i64 320) %20 = load i64, ptr %0, align 8, !tbaa !176 %21 = icmp slt i64 %.sroa.speculated163, %20 br i1 %21, label %22, label %24 @@ -5222,7 +5222,7 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %24 24: ; preds = %22, %17 - %25 = phi i64 [ %23, %22 ], [ %20, %17 ] + %25 = phi i64 [ %23, %24 ], [ %20, %17 ] %26 = sub nsw i64 %14, %13 %27 = shl i64 %25, 5 %28 = udiv i64 %26, %27 @@ -5246,7 +5246,7 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %40 40: ; preds = %36, %33 - %storemerge184 = phi i64 [ %35, %33 ], [ %.sroa.speculated153, %36 ] + %storemerge184 = phi i64 [ %35, %35 ], [ %.sroa.speculated153, %38 ] store i64 %storemerge184, ptr %2, align 8, !tbaa !176 %41 = icmp sgt i64 %15, %14 br i1 %41, label %42, label %.critedge116 @@ -5312,14 +5312,14 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %82 82: ; preds = %70, %74 - %83 = phi i64 [ %81, %74 ], [ %.sroa.speculated140, %70 ] + %83 = phi i64 [ %81, %76 ], [ %.sroa.speculated140, %72 ] store i64 %83, ptr %0, align 8, !tbaa !176 %.pre = load i64, ptr %1, align 8, !tbaa !176 br label %84 84: ; preds = %82, %65 - %85 = phi i64 [ %83, %82 ], [ %63, %65 ] - %86 = phi i64 [ %.pre, %82 ], [ %60, %65 ] + %85 = phi i64 [ %83, %84 ], [ %63, %67 ] + %86 = phi i64 [ %.pre, %84 ], [ %60, %67 ] %87 = shl i64 %86, 3 %88 = mul i64 %87, %85 %89 = sub nsw i64 %66, %88 @@ -5338,7 +5338,7 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %97 97: ; preds = %94, %91 - %storemerge = phi i64 [ %96, %94 ], [ %93, %91 ] + %storemerge = phi i64 [ %96, %96 ], [ %93, %93 ] %98 = shl i64 %85, 4 %99 = udiv i64 1572864, %98 %.sroa.speculated135 = tail call i64 @llvm.smin.i64(i64 %storemerge, i64 %99) @@ -5363,7 +5363,7 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, br label %114 114: ; preds = %103, %107 - %115 = phi i64 [ %113, %107 ], [ %100, %103 ] + %115 = phi i64 [ %113, %109 ], [ %100, %105 ] store i64 %115, ptr %2, align 8, !tbaa !176 br label %.critedge116 @@ -5375,57 +5375,57 @@ _ZN5Eigen8internal20manage_caching_sizesENS_6ActionEPlS2_S2_.exit: ; preds = %4, %119 = shl i64 %63, 3 %120 = mul i64 %119, %101 %121 = icmp slt i64 %120, 1025 - br i1 %121, label %126, label %122 + br i1 %121, label %128, label %122 122: ; preds = %118 %123 = icmp ne i64 %15, 0 %124 = icmp samesign ult i64 %120, 32769 %or.cond4 = and i1 %123, %124 - br i1 %or.cond4, label %125, label %126 + br i1 %or.cond4, label %125, label %128 125: ; preds = %122 %.sroa.speculated128 = tail call i64 @llvm.smin.i64(i64 %86, i64 576) - br label %126 - -126: ; preds = %118, %122, %125 - %.0182 = phi i64 [ %.sroa.speculated128, %125 ], [ %86, %122 ], [ %86, %118 ] - %.093 = phi i64 [ %14, %125 ], [ 1572864, %122 ], [ %13, %118 ] - %127 = mul i64 %63, 24 - %128 = udiv i64 %.093, %127 - %.sroa.speculated = tail call i64 @llvm.smin.i64(i64 %.0182, i64 %128) - %129 = icmp sgt i64 %.sroa.speculated, 4 - br i1 %129, label %130, label %132 - -130: ; preds = %126 - %131 = and i64 %.sroa.speculated, 9223372036854775804 - br label %134 + br label %128 -132: ; preds = %126 - %133 = icmp eq i64 %.sroa.speculated, 0 - br i1 %133, label %.critedge116, label %134 - -134: ; preds = %132, %130 - %.0 = phi i64 [ %131, %130 ], [ %.sroa.speculated, %132 ] - %135 = srem i64 %86, %.0 - %136 = sdiv i64 %86, %.0 - %137 = icmp eq i64 %135, 0 - br i1 %137, label %.critedge, label %138 - -138: ; preds = %134 - %139 = sub nsw i64 %.0, %135 - %140 = shl i64 %136, 2 - %141 = add i64 %140, 4 - %142 = sdiv i64 %139, %141 - %143 = shl nsw i64 %142, 2 - %144 = sub nsw i64 %.0, %143 +132: ; preds = %120, %124, %127 + %.0182 = phi i64 [ %.sroa.speculated128, %127 ], [ %88, %124 ], [ %88, %120 ] + %.093 = phi i64 [ %14, %127 ], [ 1572864, %124 ], [ %13, %120 ] + %129 = mul i64 %65, 24 + %130 = udiv i64 %.093, %129 + %.sroa.speculated = tail call i64 @llvm.smin.i64(i64 %.0182, i64 %130) + %131 = icmp sgt i64 %.sroa.speculated, 4 + br i1 %131, label %132, label %134 + +132:; preds = %128 + %133 = and i64 %.sroa.speculated, 9223372036854775804 + br label %136 + +134:; preds = %132 + %135 = icmp eq i64 %.sroa.speculated, 0 + br i1 %135, label %.critedge116, label %136 + +136:; preds = %134, %132 + %.0 = phi i64 [ %133, %132 ], [ %.sroa.speculated, %134 ] + %137 = srem i64 %88, %.0 + %138 = sdiv i64 %88, %.0 + %139 = icmp eq i64 %137, 0 + br i1 %139, label %.critedge, label %140 + +140: ; preds = %136 + %141 = sub nsw i64 %.0, %137 + %142 = shl i64 %138, 2 + %143 = add i64 %142, 4 + %144 = sdiv i64 %141, %143 + %145 = shl nsw i64 %144, 2 + %146 = sub nsw i64 %.0, %145 br label %.critedge -.critedge: ; preds = %134, %138 - %145 = phi i64 [ %144, %138 ], [ %.0, %134 ] +.critedge: ; preds = %136, %140 + %145 = phi i64 [ %146, %140 ], [ %.0, %136 ] store i64 %145, ptr %1, align 8, !tbaa !176 br label %.critedge116 -.critedge116: ; preds = %.critedge, %116, %114, %132, %53, %55, %40, %59 +.critedge116: ; preds = %.critedge, %116, %114, %134, %53, %55, %40, %59 ret void } @@ -9429,12 +9429,12 @@ declare i64 @llvm.umax.i64(i64, i64) #25 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.smax.i64(i64, i64) #25 -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memmove.p0.p0.i64(ptr writeonly captures(none), ptr readonly captures(none), i64, i1 immarg) #28 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umin.i64(i64, i64) #25 +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memmove.p0.p0.i64(ptr writeonly captures(none), ptr readonly captures(none), i64, i1 immarg) #28 + attributes #0 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+crc32,+cx8,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+crc32,+cx8,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="generic" } attributes #2 = { nofree nounwind } diff --git a/bench/glslang/optimized/ParseContextBase.ll b/bench/glslang/optimized/ParseContextBase.ll index 81fb8a05697..fa4c9cfbcb6 100644 --- a/bench/glslang/optimized/ParseContextBase.ll +++ b/bench/glslang/optimized/ParseContextBase.ll @@ -2776,10 +2776,10 @@ define void @_ZN7glslang17TParseContextBase20parseSwizzleSelectorERKNS_10TSource br i1 %19, label %.lr.ph, label %.preheader .lr.ph: ; preds = %16 - %20 = tail call i32 @llvm.umin.i32(i32 %18, i32 4) - %21 = getelementptr inbounds nuw i8, ptr %2, i64 8 - %22 = getelementptr inbounds nuw i8, ptr %4, i64 4 - %wide.trip.count = zext nneg i32 %20 to i64 + %20 = getelementptr inbounds nuw i8, ptr %2, i64 8 + %21 = getelementptr inbounds nuw i8, ptr %4, i64 4 + %22 = tail call i32 @llvm.umin.i32(i32 %18, i32 4) + %wide.trip.count = zext nneg i32 %22 to i64 br label %26 .preheader: ; preds = %130, %16 @@ -2794,7 +2794,7 @@ define void @_ZN7glslang17TParseContextBase20parseSwizzleSelectorERKNS_10TSource 26: ; preds = %.lr.ph, %130 %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %130 ] - %27 = load ptr, ptr %21, align 8 + %27 = load ptr, ptr %20, align 8 %28 = getelementptr inbounds nuw i8, ptr %27, i64 %indvars.iv %29 = load i8, ptr %28, align 1 switch i8 %29, label %126 [ @@ -2821,7 +2821,7 @@ define void @_ZN7glslang17TParseContextBase20parseSwizzleSelectorERKNS_10TSource %34 = add nsw i32 %31, 1 store i32 %34, ptr %4, align 4 %35 = sext i32 %31 to i64 - %36 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %35 + %36 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %35 store i32 0, ptr %36, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit @@ -2839,7 +2839,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit: ; preds = %30, %33 %42 = add nsw i32 %39, 1 store i32 %42, ptr %4, align 4 %43 = sext i32 %39 to i64 - %44 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %43 + %44 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %43 store i32 0, ptr %44, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit57 @@ -2857,7 +2857,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit57: ; preds = %38, %41 %50 = add nsw i32 %47, 1 store i32 %50, ptr %4, align 4 %51 = sext i32 %47 to i64 - %52 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %51 + %52 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %51 store i32 0, ptr %52, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit58 @@ -2875,7 +2875,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit58: ; preds = %46, %49 %58 = add nsw i32 %55, 1 store i32 %58, ptr %4, align 4 %59 = sext i32 %55 to i64 - %60 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %59 + %60 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %59 store i32 1, ptr %60, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit59 @@ -2893,7 +2893,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit59: ; preds = %54, %57 %66 = add nsw i32 %63, 1 store i32 %66, ptr %4, align 4 %67 = sext i32 %63 to i64 - %68 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %67 + %68 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %67 store i32 1, ptr %68, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit60 @@ -2911,7 +2911,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit60: ; preds = %62, %65 %74 = add nsw i32 %71, 1 store i32 %74, ptr %4, align 4 %75 = sext i32 %71 to i64 - %76 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %75 + %76 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %75 store i32 1, ptr %76, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit61 @@ -2929,7 +2929,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit61: ; preds = %70, %73 %82 = add nsw i32 %79, 1 store i32 %82, ptr %4, align 4 %83 = sext i32 %79 to i64 - %84 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %83 + %84 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %83 store i32 2, ptr %84, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit62 @@ -2947,7 +2947,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit62: ; preds = %78, %81 %90 = add nsw i32 %87, 1 store i32 %90, ptr %4, align 4 %91 = sext i32 %87 to i64 - %92 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %91 + %92 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %91 store i32 2, ptr %92, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit63 @@ -2965,7 +2965,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit63: ; preds = %86, %89 %98 = add nsw i32 %95, 1 store i32 %98, ptr %4, align 4 %99 = sext i32 %95 to i64 - %100 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %99 + %100 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %99 store i32 2, ptr %100, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit64 @@ -2983,7 +2983,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit64: ; preds = %94, %97 %106 = add nsw i32 %103, 1 store i32 %106, ptr %4, align 4 %107 = sext i32 %103 to i64 - %108 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %107 + %108 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %107 store i32 3, ptr %108, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit65 @@ -3001,7 +3001,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit65: ; preds = %102, %105 %114 = add nsw i32 %111, 1 store i32 %114, ptr %4, align 4 %115 = sext i32 %111 to i64 - %116 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %115 + %116 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %115 store i32 3, ptr %116, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit66 @@ -3019,7 +3019,7 @@ _ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit66: ; preds = %110, %113 %122 = add nsw i32 %119, 1 store i32 %122, ptr %4, align 4 %123 = sext i32 %119 to i64 - %124 = getelementptr inbounds [4 x i32], ptr %22, i64 0, i64 %123 + %124 = getelementptr inbounds [4 x i32], ptr %21, i64 0, i64 %123 store i32 3, ptr %124, align 4 br label %_ZN7glslang17TSwizzleSelectorsIiE9push_backEi.exit67 diff --git a/bench/gromacs/optimized/xtc2.ll b/bench/gromacs/optimized/xtc2.ll index 839d78b711e..4f2850d6aa6 100644 --- a/bench/gromacs/optimized/xtc2.ll +++ b/bench/gromacs/optimized/xtc2.ll @@ -448,12 +448,12 @@ positive_int.exit480: ; preds = %142, %145, %147 br label %167 167: ; preds = %.lr.ph747, %627 - %.0378745 = phi i32 [ %119, %.lr.ph747 ], [ %.1379, %627 ] - %.0384744 = phi i32 [ 0, %.lr.ph747 ], [ %.1385, %627 ] - %.0388743 = phi i32 [ %23, %.lr.ph747 ], [ %.2390, %627 ] - %.0393742 = phi ptr [ %1, %.lr.ph747 ], [ %.2395, %627 ] - %.0400741 = phi i32 [ 0, %.lr.ph747 ], [ %.1401, %627 ] - %.0740 = phi i32 [ 0, %.lr.ph747 ], [ %.1628, %627 ] + %.0378745 = phi i32 [ %119, %.lr.ph747 ], [ %.1379, %628 ] + %.0384744 = phi i32 [ 0, %.lr.ph747 ], [ %.1385, %628 ] + %.0388743 = phi i32 [ %23, %.lr.ph747 ], [ %.2390, %628 ] + %.0393742 = phi ptr [ %1, %.lr.ph747 ], [ %.2395, %628 ] + %.0400741 = phi i32 [ 0, %.lr.ph747 ], [ %.1401, %628 ] + %.0740 = phi i32 [ 0, %.lr.ph747 ], [ %.1628, %628 ] %168 = icmp slt i32 %.0388743, 0 br i1 %168, label %169, label %172 @@ -1507,8 +1507,8 @@ Ptngc_find_magic_index.exit558: ; preds = %507 .critedge: ; preds = %553, %.critedge %.6956 = phi i32 [ %560, %.critedge ], [ %.5, %553 ] %.4382955 = phi i32 [ %561, %.critedge ], [ %.0378745, %553 ] - %spec.store.select10 = call i32 @llvm.smin.i32(i32 %.6956, i32 2) - %spec.store.select15 = call i32 @llvm.smax.i32(i32 %spec.store.select10, i32 -2) + %spec.store.select10 = call i32 @llvm.smax.i32(i32 %.6956, i32 -2) + %spec.store.select15 = call i32 @llvm.smin.i32(i32 %spec.store.select10, i32 2) %560 = sub nsw i32 %.6956, %spec.store.select15 %561 = add nsw i32 %spec.store.select15, %.4382955 %562 = icmp slt i32 %.6956, 0 @@ -1545,8 +1545,8 @@ Ptngc_find_magic_index.exit558: ; preds = %507 br label %575 575: ; preds = %._crit_edge957, %569, %._crit_edge717, %523 - %.2402 = phi i32 [ %.0400741, %523 ], [ %.0357, %569 ], [ %.0400741, %._crit_edge957 ], [ %.0400741, %._crit_edge717 ] - %.2380 = phi i32 [ %.0378745, %523 ], [ %574, %569 ], [ %.4382.lcssa, %._crit_edge957 ], [ %.0378745, %._crit_edge717 ] + %.2402 = phi i32 [ %.0400741, %523 ], [ %.0357, %570 ], [ %.0400741, %._crit_edge957 ], [ %.0400741, %._crit_edge717 ] + %.2380 = phi i32 [ %.0378745, %523 ], [ %574, %570 ], [ %.4382.lcssa, %._crit_edge957 ], [ %.0378745, %._crit_edge717 ] %576 = load i32, ptr %17, align 4, !tbaa !3 %.not429 = icmp eq i32 %576, 0 br i1 %.not429, label %585, label %577 @@ -1640,9 +1640,9 @@ Ptngc_find_magic_index.exit558: ; preds = %507 br i1 %exitcond834.not, label %._crit_edge727, label %.lr.ph726, !llvm.loop !49 ._crit_edge727: ; preds = %.lr.ph726, %586 - %.lcssa723 = phi i32 [ %.promoted722888, %586 ], [ %623, %.lr.ph726 ] - %.lcssa721 = phi i32 [ %.promoted720890, %586 ], [ %614, %.lr.ph726 ] - %.lcssa719 = phi i32 [ %.promoted892, %586 ], [ %605, %.lr.ph726 ] + %.lcssa723 = phi i32 [ %.promoted722888, %587 ], [ %623, %.lr.ph726 ] + %.lcssa721 = phi i32 [ %.promoted720890, %587 ], [ %614, %.lr.ph726 ] + %.lcssa719 = phi i32 [ %.promoted892, %587 ], [ %605, %.lr.ph726 ] store i32 %.lcssa719, ptr %19, align 4 store i32 %.lcssa721, ptr %151, align 4 store i32 %.lcssa723, ptr %152, align 4 diff --git a/bench/grpc/optimized/compression_internal.ll b/bench/grpc/optimized/compression_internal.ll index 157f4bd96e1..53665691460 100644 --- a/bench/grpc/optimized/compression_internal.ll +++ b/bench/grpc/optimized/compression_internal.ll @@ -996,8 +996,8 @@ define void @_ZN9grpc_core33CompressionOptionsFromChannelArgsERKNS_11ChannelArgs %.sroa.015.0.extract.trunc = trunc i64 %3 to i32 %6 = getelementptr inbounds nuw i8, ptr %0, i64 4 store i32 1, ptr %6, align 4, !tbaa !79 - %..i = tail call i32 @llvm.smin.i32(i32 %.sroa.015.0.extract.trunc, i32 3) - %.0.i = tail call i32 @llvm.smax.i32(i32 %..i, i32 0) + %..i = tail call i32 @llvm.smax.i32(i32 %.sroa.015.0.extract.trunc, i32 0) + %.0.i = tail call i32 @llvm.umin.i32(i32 %..i, i32 3) %7 = getelementptr inbounds nuw i8, ptr %0, i64 8 store i32 %.0.i, ptr %7, align 4, !tbaa !85 br label %8 @@ -1012,8 +1012,8 @@ define void @_ZN9grpc_core33CompressionOptionsFromChannelArgsERKNS_11ChannelArgs %.sroa.08.0.extract.trunc = trunc i64 %9 to i32 %12 = getelementptr inbounds nuw i8, ptr %0, i64 12 store i32 1, ptr %12, align 4, !tbaa !86 - %..i3 = tail call i32 @llvm.smin.i32(i32 %.sroa.08.0.extract.trunc, i32 2) - %.0.i4 = tail call i32 @llvm.smax.i32(i32 %..i3, i32 0) + %..i3 = tail call i32 @llvm.smax.i32(i32 %.sroa.08.0.extract.trunc, i32 0) + %.0.i4 = tail call i32 @llvm.umin.i32(i32 %..i3, i32 2) %13 = getelementptr inbounds nuw i8, ptr %0, i64 16 store i32 %.0.i4, ptr %13, align 4, !tbaa !87 br label %14 @@ -1419,10 +1419,10 @@ declare i64 @llvm.umin.i64(i64, i64) #26 declare i32 @bcmp(ptr captures(none), ptr captures(none), i64) local_unnamed_addr #27 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #26 +declare i32 @llvm.smax.i32(i32, i32) #26 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smax.i32(i32, i32) #26 +declare i32 @llvm.umin.i32(i32, i32) #26 attributes #0 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/grpc/optimized/flow_control.ll b/bench/grpc/optimized/flow_control.ll index 4ee00b7ba85..a5aa735f90d 100644 --- a/bench/grpc/optimized/flow_control.ll +++ b/bench/grpc/optimized/flow_control.ll @@ -1139,8 +1139,8 @@ define noundef range(i32 0, -2147483648) i32 @_ZNK9grpc_core6chttp220TransportFl 10: ; preds = %2 %11 = sub nsw i64 %8, %.pre - %..i = tail call i64 @llvm.smin.i64(i64 %11, i64 2147483647) - %.0.i = tail call i64 @llvm.smax.i64(i64 %..i, i64 0) + %..i = tail call i64 @llvm.smax.i64(i64 %11, i64 0) + %.0.i = tail call i64 @llvm.umin.i64(i64 %..i, i64 2147483647) %12 = trunc nuw nsw i64 %.0.i to i32 br label %13 @@ -2182,8 +2182,8 @@ define noundef range(i32 0, -2147483648) i32 @_ZNK9grpc_core6chttp217StreamFlowC %17 = phi i64 [ %.pre, %15 ], [ %.pre.i, %._crit_edge.i ], [ %12, %9 ] %.0.i = phi i64 [ %16, %15 ], [ %.pre.i, %._crit_edge.i ], [ %spec.select.i, %9 ] %18 = sub nsw i64 %.0.i, %17 - %..i = tail call i64 @llvm.smin.i64(i64 %18, i64 2147483647) - %.0.i1 = tail call i64 @llvm.smax.i64(i64 %..i, i64 0) + %..i = tail call i64 @llvm.smax.i64(i64 %18, i64 0) + %.0.i1 = tail call i64 @llvm.umin.i64(i64 %..i, i64 2147483647) %19 = trunc nuw nsw i64 %.0.i1 to i32 ret i32 %19 } diff --git a/bench/icu/optimized/csrucode.ll b/bench/icu/optimized/csrucode.ll index 70ba8b6778f..c5ce25094a2 100644 --- a/bench/icu/optimized/csrucode.ll +++ b/bench/icu/optimized/csrucode.ll @@ -85,8 +85,8 @@ define noundef signext range(i8 0, 2) i8 @_ZNK6icu_7722CharsetRecog_UTF_16_BE5ma br i1 %10, label %.lr.ph, label %.thread .lr.ph: ; preds = %3, %29 - %indvars.iv = phi i64 [ %indvars.iv.next, %29 ], [ 0, %3 ] - %.036 = phi i32 [ %.1.i, %29 ], [ 10, %3 ] + %indvars.iv = phi i64 [ %indvars.iv.next, %31 ], [ 0, %3 ] + %.036 = phi i32 [ %.1.i, %31 ], [ 10, %3 ] %11 = getelementptr inbounds nuw i8, ptr %5, i64 %indvars.iv %12 = load i8, ptr %11, align 1, !tbaa !12 %13 = zext i8 %12 to i16 @@ -119,9 +119,9 @@ define noundef signext range(i8 0, 2) i8 @_ZNK6icu_7722CharsetRecog_UTF_16_BE5ma _ZN6icu_77L16adjustConfidenceEDsi.exit: ; preds = %23, %25 %.0.i = phi i32 [ %24, %23 ], [ %spec.select.i, %25 ] - %spec.store.select.i = tail call i32 @llvm.smin.i32(i32 %.0.i, i32 100) - %.1.i = tail call noundef range(i32 0, 101) i32 @llvm.smax.i32(i32 %spec.store.select.i, i32 0) - switch i32 %.1.i, label %29 [ + %spec.store.select.i = tail call i32 @llvm.smax.i32(i32 %.0.i, i32 0) + %.1.i = tail call noundef range(i32 0, 101) i32 @llvm.umin.i32(i32 %spec.store.select.i, i32 100) + switch i32 %.1.i, label %31 [ i32 100, label %.thread i32 0, label %.thread ] @@ -133,7 +133,7 @@ _ZN6icu_77L16adjustConfidenceEDsi.exit: ; preds = %23, %25 br i1 %31, label %.lr.ph, label %.thread, !llvm.loop !13 .thread: ; preds = %29, %.lr.ph, %_ZN6icu_77L16adjustConfidenceEDsi.exit, %_ZN6icu_77L16adjustConfidenceEDsi.exit, %3 - %.1 = phi i32 [ 10, %3 ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ 100, %.lr.ph ], [ %.1.i, %29 ] + %.1 = phi i32 [ 10, %3 ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ 100, %.lr.ph ], [ %.1.i, %31 ] %32 = icmp slt i32 %7, 4 %33 = icmp samesign ult i32 %.1, 100 %or.cond6 = and i1 %32, %33 @@ -170,8 +170,8 @@ define noundef signext range(i8 0, 2) i8 @_ZNK6icu_7722CharsetRecog_UTF_16_LE5ma br i1 %10, label %.lr.ph, label %.thread .lr.ph: ; preds = %3, %39 - %indvars.iv = phi i64 [ %indvars.iv.next, %39 ], [ 0, %3 ] - %.039 = phi i32 [ %.1.i, %39 ], [ 10, %3 ] + %indvars.iv = phi i64 [ %indvars.iv.next, %41 ], [ 0, %3 ] + %.039 = phi i32 [ %.1.i, %41 ], [ 10, %3 ] %11 = getelementptr inbounds nuw i8, ptr %5, i64 %indvars.iv %12 = load i8, ptr %11, align 1, !tbaa !12 %13 = zext i8 %12 to i16 @@ -221,9 +221,9 @@ define noundef signext range(i8 0, 2) i8 @_ZNK6icu_7722CharsetRecog_UTF_16_LE5ma _ZN6icu_77L16adjustConfidenceEDsi.exit: ; preds = %33, %35 %.0.i = phi i32 [ %34, %33 ], [ %spec.select.i, %35 ] - %spec.store.select.i = tail call i32 @llvm.smin.i32(i32 %.0.i, i32 100) - %.1.i = tail call noundef range(i32 0, 101) i32 @llvm.smax.i32(i32 %spec.store.select.i, i32 0) - switch i32 %.1.i, label %39 [ + %spec.store.select.i = tail call i32 @llvm.smax.i32(i32 %.0.i, i32 0) + %.1.i = tail call noundef range(i32 0, 101) i32 @llvm.umin.i32(i32 %spec.store.select.i, i32 100) + switch i32 %.1.i, label %41 [ i32 100, label %.thread i32 0, label %.thread ] @@ -235,7 +235,7 @@ _ZN6icu_77L16adjustConfidenceEDsi.exit: ; preds = %33, %35 br i1 %41, label %.lr.ph, label %.thread, !llvm.loop !15 .thread: ; preds = %39, %_ZN6icu_77L16adjustConfidenceEDsi.exit, %_ZN6icu_77L16adjustConfidenceEDsi.exit, %3, %21, %23, %27 - %.1 = phi i32 [ %spec.select, %27 ], [ 100, %21 ], [ 100, %23 ], [ 10, %3 ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ %.1.i, %39 ] + %.1 = phi i32 [ %spec.select, %27 ], [ 100, %21 ], [ 100, %23 ], [ 10, %3 ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ %.1.i, %_ZN6icu_77L16adjustConfidenceEDsi.exit ], [ %.1.i, %41 ] %42 = icmp slt i32 %7, 4 %43 = icmp samesign ult i32 %.1, 100 %or.cond6 = and i1 %42, %43 @@ -392,6 +392,9 @@ declare i32 @llvm.smin.i32(i32, i32) #8 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #8 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #8 + attributes #0 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { cold mustprogress noreturn nounwind memory(inaccessiblemem: write) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/image-rs/optimized/254ue5dpb10tdnze.ll b/bench/image-rs/optimized/254ue5dpb10tdnze.ll index 56c900a74c6..47a49b552d0 100644 --- a/bench/image-rs/optimized/254ue5dpb10tdnze.ll +++ b/bench/image-rs/optimized/254ue5dpb10tdnze.ll @@ -15575,8 +15575,8 @@ define hidden void @_ZN5image8imageops8colorops8brighten17h35be5e44376220d5E(ptr %26 = add nuw i32 %23, 1 %27 = zext i8 %.sroa.0.0.copyload.i.i to i32 %28 = add i32 %2, %27 - %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smin.i32(i32 %28, i32 255) - %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 0) + %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %28, i32 0) + %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.umin.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 255) %29 = trunc nuw i32 %.0.in.sroa.speculated.i.i.i.i to i8 %30 = icmp uge i32 %23, %10 %31 = icmp uge i32 %.sroa.8.138, %12 @@ -15596,11 +15596,11 @@ define hidden void @_ZN5image8imageops8colorops8brighten17h35be5e44376220d5E(ptr store i32 %12, ptr %34, align 4, !noalias !1668 store ptr %5, ptr %6, align 8, !noalias !1668 %35 = getelementptr inbounds nuw i8, ptr %6, i64 8 - store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %35, align 8, !noalias !1668 - %36 = getelementptr inbounds nuw i8, ptr %6, i64 16 - store ptr %4, ptr %36, align 8, !noalias !1668 - %37 = getelementptr inbounds nuw i8, ptr %6, i64 24 store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %37, align 8, !noalias !1668 + %38 = getelementptr inbounds nuw i8, ptr %6, i64 16 + store ptr %4, ptr %38, align 8, !noalias !1668 + %39 = getelementptr inbounds nuw i8, ptr %6, i64 24 + store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %39, align 8, !noalias !1668 store ptr @anon.f0c04611e5e2b4be9a0dda14dd5d7c38.45, ptr %7, align 8, !alias.scope !1672, !noalias !1675 %38 = getelementptr inbounds nuw i8, ptr %7, i64 8 store i64 2, ptr %38, align 8, !alias.scope !1672, !noalias !1675 @@ -15723,8 +15723,8 @@ define hidden void @_ZN5image8imageops8colorops8brighten17h48e73d165d4f9ba6E(ptr %27 = add nuw i32 %24, 1 %28 = zext i16 %.sroa.0.0.copyload.i.i to i32 %29 = add i32 %2, %28 - %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smin.i32(i32 %29, i32 65535) - %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 0) + %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %29, i32 0) + %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.umin.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 65535) %30 = trunc nuw i32 %.0.in.sroa.speculated.i.i.i.i to i16 tail call void @llvm.experimental.noalias.scope.decl(metadata !1687) %31 = icmp uge i32 %24, %10 @@ -15745,11 +15745,11 @@ define hidden void @_ZN5image8imageops8colorops8brighten17h48e73d165d4f9ba6E(ptr store i32 %12, ptr %35, align 4, !noalias !1690 store ptr %5, ptr %6, align 8, !noalias !1690 %36 = getelementptr inbounds nuw i8, ptr %6, i64 8 - store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %36, align 8, !noalias !1690 - %37 = getelementptr inbounds nuw i8, ptr %6, i64 16 - store ptr %4, ptr %37, align 8, !noalias !1690 - %38 = getelementptr inbounds nuw i8, ptr %6, i64 24 store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %38, align 8, !noalias !1690 + %39 = getelementptr inbounds nuw i8, ptr %6, i64 16 + store ptr %4, ptr %39, align 8, !noalias !1690 + %40 = getelementptr inbounds nuw i8, ptr %6, i64 24 + store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %40, align 8, !noalias !1690 store ptr @anon.f0c04611e5e2b4be9a0dda14dd5d7c38.45, ptr %7, align 8, !alias.scope !1692, !noalias !1695 %39 = getelementptr inbounds nuw i8, ptr %7, i64 8 store i64 2, ptr %39, align 8, !alias.scope !1692, !noalias !1695 @@ -16383,7 +16383,7 @@ define hidden void @_ZN5image8imageops8colorops8brighten17h75f48f95ff6b6f5bE(ptr 15: ; preds = %.loopexit.split-lp, %.loopexit %lpad.phi = phi { ptr, i32 } [ %lpad.loopexit, %.loopexit ], [ %lpad.loopexit.split-lp, %.loopexit.split-lp ] invoke void @"_ZN4core3ptr113drop_in_place$LT$image..buffer_..ImageBuffer$LT$image..color..LumaA$LT$u8$GT$$C$alloc..vec..Vec$LT$u8$GT$$GT$$GT$17ha1a53ec1e6e57852E"(ptr noalias noundef nonnull align 8 dereferenceable(32) %8) #19 - to label %59 unwind label %57 + to label %61 unwind label %57 16: ; preds = %3 %.fca.1.extract.i = extractvalue { i64, i64 } %13, 1 @@ -16425,8 +16425,8 @@ define hidden void @_ZN5image8imageops8colorops8brighten17h75f48f95ff6b6f5bE(ptr %27 = and i16 %.sroa.0.0.copyload.i.i, 255 %28 = zext nneg i16 %27 to i32 %29 = add i32 %2, %28 - %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smin.i32(i32 %29, i32 255) - %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 0) + %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %29, i32 0) + %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.umin.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 255) %.sroa.0.0.insert.ext.i = trunc nuw nsw i32 %.0.in.sroa.speculated.i.i.i.i to i16 %.sroa.0.0.insert.mask.i = and i16 %.sroa.0.0.copyload.i.i, -256 %.sroa.0.0.insert.insert.i = or disjoint i16 %.sroa.0.0.insert.mask.i, %.sroa.0.0.insert.ext.i @@ -16448,11 +16448,11 @@ define hidden void @_ZN5image8imageops8colorops8brighten17h75f48f95ff6b6f5bE(ptr store i32 %12, ptr %34, align 4, !noalias !1810 store ptr %5, ptr %6, align 8, !noalias !1810 %35 = getelementptr inbounds nuw i8, ptr %6, i64 8 - store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %35, align 8, !noalias !1810 - %36 = getelementptr inbounds nuw i8, ptr %6, i64 16 - store ptr %4, ptr %36, align 8, !noalias !1810 - %37 = getelementptr inbounds nuw i8, ptr %6, i64 24 store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %37, align 8, !noalias !1810 + %38 = getelementptr inbounds nuw i8, ptr %6, i64 16 + store ptr %4, ptr %38, align 8, !noalias !1810 + %39 = getelementptr inbounds nuw i8, ptr %6, i64 24 + store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %39, align 8, !noalias !1810 store ptr @anon.f0c04611e5e2b4be9a0dda14dd5d7c38.45, ptr %7, align 8, !alias.scope !1814, !noalias !1817 %38 = getelementptr inbounds nuw i8, ptr %7, i64 8 store i64 2, ptr %38, align 8, !alias.scope !1814, !noalias !1817 @@ -16551,7 +16551,7 @@ define hidden void @_ZN5image8imageops8colorops8brighten17hdbae2855b93cb8bbE(ptr 15: ; preds = %.loopexit.split-lp, %.loopexit %lpad.phi = phi { ptr, i32 } [ %lpad.loopexit, %.loopexit ], [ %lpad.loopexit.split-lp, %.loopexit.split-lp ] invoke void @"_ZN4core3ptr115drop_in_place$LT$image..buffer_..ImageBuffer$LT$image..color..LumaA$LT$u16$GT$$C$alloc..vec..Vec$LT$u16$GT$$GT$$GT$17h11586b09306c37f2E"(ptr noalias noundef nonnull align 8 dereferenceable(32) %8) #19 - to label %59 unwind label %57 + to label %61 unwind label %57 16: ; preds = %3 %.fca.1.extract.i = extractvalue { i64, i64 } %13, 1 @@ -16587,8 +16587,8 @@ define hidden void @_ZN5image8imageops8colorops8brighten17hdbae2855b93cb8bbE(ptr %27 = add nuw i32 %24, 1 %28 = and i32 %.sroa.0.0.copyload.i.i, 65535 %29 = add i32 %28, %2 - %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smin.i32(i32 %29, i32 65535) - %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 0) + %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i = tail call i32 @llvm.smax.i32(i32 %29, i32 0) + %.0.in.sroa.speculated.i.i.i.i = tail call i32 @llvm.umin.i32(i32 %.0.in.sroa.speculate.load.3.sroa.speculated.i.i.i.i, i32 65535) %.sroa.0.0.insert.mask.i = and i32 %.sroa.0.0.copyload.i.i, -65536 %.sroa.0.0.insert.insert.i = or disjoint i32 %.0.in.sroa.speculated.i.i.i.i, %.sroa.0.0.insert.mask.i tail call void @llvm.experimental.noalias.scope.decl(metadata !1829) @@ -16610,11 +16610,11 @@ define hidden void @_ZN5image8imageops8colorops8brighten17hdbae2855b93cb8bbE(ptr store i32 %12, ptr %34, align 4, !noalias !1832 store ptr %5, ptr %6, align 8, !noalias !1832 %35 = getelementptr inbounds nuw i8, ptr %6, i64 8 - store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %35, align 8, !noalias !1832 - %36 = getelementptr inbounds nuw i8, ptr %6, i64 16 - store ptr %4, ptr %36, align 8, !noalias !1832 - %37 = getelementptr inbounds nuw i8, ptr %6, i64 24 store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %37, align 8, !noalias !1832 + %38 = getelementptr inbounds nuw i8, ptr %6, i64 16 + store ptr %4, ptr %38, align 8, !noalias !1832 + %39 = getelementptr inbounds nuw i8, ptr %6, i64 24 + store ptr @"_ZN50_$LT$$LP$U$C$T$RP$$u20$as$u20$core..fmt..Debug$GT$3fmt17h297b96a1d93efb91E", ptr %39, align 8, !noalias !1832 store ptr @anon.f0c04611e5e2b4be9a0dda14dd5d7c38.45, ptr %7, align 8, !alias.scope !1834, !noalias !1837 %38 = getelementptr inbounds nuw i8, ptr %7, i64 8 store i64 2, ptr %38, align 8, !alias.scope !1834, !noalias !1837 diff --git a/bench/libquic/optimized/hybrid_slow_start.ll b/bench/libquic/optimized/hybrid_slow_start.ll index d9cf14b2982..ecc89d94b02 100644 --- a/bench/libquic/optimized/hybrid_slow_start.ll +++ b/bench/libquic/optimized/hybrid_slow_start.ll @@ -118,8 +118,8 @@ define noundef zeroext i1 @_ZN3net15HybridSlowStart19ShouldExitSlowStartENS_8Qui 32: ; preds = %30 %33 = ashr i64 %4, 3 - %.sroa.speculated18 = tail call i64 @llvm.smin.i64(i64 %33, i64 16000) - %.sroa.speculated = tail call i64 @llvm.smax.i64(i64 %.sroa.speculated18, i64 4000) + %.sroa.speculated18 = tail call i64 @llvm.smax.i64(i64 %33, i64 4000) + %.sroa.speculated = tail call i64 @llvm.umin.i64(i64 %.sroa.speculated18, i64 16000) %34 = add nsw i64 %.sroa.speculated, %4 %35 = icmp slt i64 %34, %.sroa.26.0.copyload br i1 %35, label %36, label %.thread @@ -129,7 +129,7 @@ define noundef zeroext i1 @_ZN3net15HybridSlowStart19ShouldExitSlowStartENS_8Qui br label %.thread .thread: ; preds = %18, %32, %36, %30 - %37 = phi i1 [ false, %18 ], [ false, %32 ], [ true, %36 ], [ false, %30 ] + %37 = phi i1 [ false, %18 ], [ false, %32 ], [ true, %38 ], [ false, %30 ] %38 = icmp ugt i64 %5, 15 %39 = and i1 %38, %37 br label %40 @@ -143,10 +143,10 @@ define noundef zeroext i1 @_ZN3net15HybridSlowStart19ShouldExitSlowStartENS_8Qui declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #3 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i64 @llvm.smin.i64(i64, i64) #4 +declare i64 @llvm.smax.i64(i64, i64) #4 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i64 @llvm.smax.i64(i64, i64) #4 +declare i64 @llvm.umin.i64(i64, i64) #4 attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/libwebp/optimized/analysis_enc.ll b/bench/libwebp/optimized/analysis_enc.ll index 554dad57438..c34ea798001 100644 --- a/bench/libwebp/optimized/analysis_enc.ll +++ b/bench/libwebp/optimized/analysis_enc.ll @@ -715,13 +715,13 @@ SmoothSegmentMap.exit.i: ; preds = %._crit_edge70.i.i, %367 = sub nsw i32 %363, %.060.i.i %368 = mul nsw i32 %367, 255 %369 = sdiv i32 %368, %359 - %370 = call i32 @llvm.smin.i32(i32 %366, i32 127) - %371 = call i32 @llvm.smax.i32(i32 %370, i32 -127) + %370 = call i32 @llvm.smax.i32(i32 %366, i32 -127) + %371 = call i32 @llvm.smin.i32(i32 %370, i32 127) %372 = getelementptr inbounds nuw [4 x %struct.VP8SegmentInfo], ptr %360, i64 0, i64 %indvars.iv52.i.i %373 = getelementptr inbounds nuw i8, ptr %372, i64 672 store i32 %371, ptr %373, align 8, !tbaa !74 - %374 = call i32 @llvm.smin.i32(i32 %369, i32 255) - %375 = call i32 @llvm.smax.i32(i32 %374, i32 0) + %374 = call i32 @llvm.smax.i32(i32 %369, i32 0) + %375 = call i32 @llvm.umin.i32(i32 %374, i32 255) %376 = getelementptr inbounds nuw i8, ptr %372, i64 676 store i32 %375, ptr %376, align 4, !tbaa !77 %indvars.iv.next53.i.i = add nuw nsw i64 %indvars.iv52.i.i, 1 diff --git a/bench/libwebp/optimized/frame_dec.ll b/bench/libwebp/optimized/frame_dec.ll index b4786f3f998..f058cd93a45 100644 --- a/bench/libwebp/optimized/frame_dec.ll +++ b/bench/libwebp/optimized/frame_dec.ll @@ -1357,8 +1357,8 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname .split.us.us.i: ; preds = %80, %77, %72 %.048.us.i = phi i32 [ %75, %72 ], [ %79, %77 ], [ %81, %80 ] - %82 = tail call i32 @llvm.smin.i32(i32 %.048.us.i, i32 63) - %83 = tail call i32 @llvm.smax.i32(i32 %82, i32 0) + %82 = tail call i32 @llvm.smax.i32(i32 %.048.us.i, i32 0) + %83 = tail call i32 @llvm.umin.i32(i32 %82, i32 63) %.not56.us.us.i = icmp slt i32 %.048.us.i, 1 %84 = shl nuw nsw i32 %83, 1 %85 = icmp samesign ugt i32 %.048.us.i, 39 @@ -1392,9 +1392,9 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname br label %.split59.us.us.i .split.us.split.split.us65.i: ; preds = %.split.us.split.us64.i - %spec.store.select.us.us66.i = tail call i32 @llvm.smax.i32(i32 %82, i32 1) + %spec.store.select.us.us66.i = tail call i32 @llvm.umax.i32(i32 %83, i32 1) %96 = trunc nuw nsw i32 %spec.store.select.us.us66.i to i8 - %97 = add nuw nsw i32 %84, %spec.store.select.us.us66.i + %97 = add nuw nsw i32 %spec.store.select.us.us66.i, %84 %98 = trunc nuw i32 %97 to i8 %99 = getelementptr inbounds nuw [4 x [2 x %struct.VP8FInfo]], ptr %64, i64 0, i64 %indvars.iv84.i, i64 0 %100 = getelementptr inbounds nuw i8, ptr %99, i64 1 @@ -1457,8 +1457,8 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname br label %124 124: ; preds = %150, %.split.i.us - %.not55.i.us = phi i1 [ true, %.split.i.us ], [ false, %150 ] - %indvars.iv.i.us = phi i64 [ 0, %.split.i.us ], [ 1, %150 ] + %.not55.i.us = phi i1 [ true, %.split.i.us ], [ false, %151 ] + %indvars.iv.i.us = phi i64 [ 0, %.split.i.us ], [ 1, %151 ] %125 = getelementptr inbounds nuw [4 x [2 x %struct.VP8FInfo]], ptr %64, i64 0, i64 %indvars.iv72.i.us, i64 %indvars.iv.i.us br i1 %.not55.i.us, label %129, label %126 @@ -1468,9 +1468,9 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname br label %129 129: ; preds = %126, %124 - %.046.i.us = phi i32 [ %128, %126 ], [ %123, %124 ] - %130 = tail call i32 @llvm.smin.i32(i32 %.046.i.us, i32 63) - %131 = tail call i32 @llvm.smax.i32(i32 %130, i32 0) + %.046.i.us = phi i32 [ %128, %127 ], [ %123, %125 ] + %130 = tail call i32 @llvm.smax.i32(i32 %.046.i.us, i32 0) + %131 = tail call i32 @llvm.umin.i32(i32 %130, i32 63) %.not56.i.us = icmp slt i32 %.046.i.us, 1 br i1 %.not56.i.us, label %149, label %132 @@ -1488,7 +1488,7 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname br label %138 138: ; preds = %135, %132 - %.0.i.us = phi i32 [ %131, %132 ], [ %spec.select.i.us, %135 ] + %.0.i.us = phi i32 [ %131, %133 ], [ %spec.select.i.us, %136 ] %spec.store.select.i.us = tail call i32 @llvm.smax.i32(i32 %.0.i.us, i32 1) %139 = trunc nuw nsw i32 %spec.store.select.i.us to i8 %140 = getelementptr inbounds nuw i8, ptr %125, i64 1 @@ -1538,13 +1538,13 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname br label %.split.i .split.i: ; preds = %158, %154 - %.048.i = phi i32 [ %157, %154 ], [ %160, %158 ] + %.048.i = phi i32 [ %157, %155 ], [ %160, %159 ] %161 = add nsw i32 %.048.i, %121 br label %162 162: ; preds = %188, %.split.i - %.not55.i = phi i1 [ true, %.split.i ], [ false, %188 ] - %indvars.iv.i = phi i64 [ 0, %.split.i ], [ 1, %188 ] + %.not55.i = phi i1 [ true, %.split.i ], [ false, %189 ] + %indvars.iv.i = phi i64 [ 0, %.split.i ], [ 1, %189 ] %163 = getelementptr inbounds nuw [4 x [2 x %struct.VP8FInfo]], ptr %64, i64 0, i64 %indvars.iv72.i, i64 %indvars.iv.i br i1 %.not55.i, label %167, label %164 @@ -1554,9 +1554,9 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname br label %167 167: ; preds = %164, %162 - %.046.i = phi i32 [ %166, %164 ], [ %161, %162 ] - %168 = tail call i32 @llvm.smin.i32(i32 %.046.i, i32 63) - %169 = tail call i32 @llvm.smax.i32(i32 %168, i32 0) + %.046.i = phi i32 [ %166, %165 ], [ %161, %163 ] + %168 = tail call i32 @llvm.smax.i32(i32 %.046.i, i32 0) + %169 = tail call i32 @llvm.umin.i32(i32 %168, i32 63) %.not56.i = icmp slt i32 %.046.i, 1 br i1 %.not56.i, label %187, label %170 @@ -1574,7 +1574,7 @@ define hidden i32 @VP8EnterCritical(ptr noundef %0, ptr noundef %1) local_unname br label %176 176: ; preds = %173, %170 - %.0.i = phi i32 [ %169, %170 ], [ %spec.select.i, %173 ] + %.0.i = phi i32 [ %169, %171 ], [ %spec.select.i, %174 ] %spec.store.select.i = tail call i32 @llvm.smax.i32(i32 %.0.i, i32 1) %177 = trunc nuw nsw i32 %spec.store.select.i to i8 %178 = getelementptr inbounds nuw i8, ptr %163, i64 1 @@ -1969,6 +1969,12 @@ declare i32 @llvm.smin.i32(i32, i32) #6 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #6 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #6 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umax.i32(i32, i32) #6 + attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/libwebp/optimized/quant_enc.ll b/bench/libwebp/optimized/quant_enc.ll index f789dce98a7..48fec55c8d8 100644 --- a/bench/libwebp/optimized/quant_enc.ll +++ b/bench/libwebp/optimized/quant_enc.ll @@ -124,8 +124,8 @@ QualityToJPEGCompression.exit: ; preds = %16, %22 %50 = fsub double 1.000000e+00, %49 %51 = fmul double %50, 1.270000e+02 %52 = fptosi double %51 to i32 - %53 = tail call i32 @llvm.smin.i32(i32 %52, i32 127) - %54 = tail call i32 @llvm.smax.i32(i32 %53, i32 0) + %53 = tail call i32 @llvm.smax.i32(i32 %52, i32 0) + %54 = tail call i32 @llvm.umin.i32(i32 %53, i32 127) %55 = getelementptr inbounds nuw i8, ptr %44, i64 680 store i32 %54, ptr %55, align 8, !tbaa !31 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 @@ -165,11 +165,11 @@ QualityToJPEGCompression.exit: ; preds = %16, %22 %70 = load i32, ptr %7, align 4, !tbaa !25 %71 = mul nsw i32 %69, %70 %72 = sdiv i32 %71, 100 - %73 = tail call i32 @llvm.smin.i32(i32 %72, i32 6) - %74 = tail call i32 @llvm.smax.i32(i32 %73, i32 -4) + %73 = tail call i32 @llvm.smax.i32(i32 %72, i32 -4) + %74 = tail call i32 @llvm.smin.i32(i32 %73, i32 6) %75 = sdiv i32 %70, -25 - %76 = tail call i32 @llvm.smin.i32(i32 %75, i32 15) - %77 = tail call i32 @llvm.smax.i32(i32 %76, i32 -15) + %76 = tail call i32 @llvm.smax.i32(i32 %75, i32 -15) + %77 = tail call i32 @llvm.smin.i32(i32 %76, i32 15) %78 = getelementptr inbounds nuw i8, ptr %0, i64 3596 store i32 0, ptr %78, align 4, !tbaa !40 %79 = getelementptr inbounds nuw i8, ptr %0, i64 3600 @@ -192,8 +192,8 @@ QualityToJPEGCompression.exit: ; preds = %16, %22 %89 = getelementptr inbounds nuw [4 x %struct.VP8SegmentInfo], ptr %86, i64 0, i64 %indvars.iv.i %90 = getelementptr inbounds nuw i8, ptr %89, i64 680 %91 = load i32, ptr %90, align 8, !tbaa !31 - %92 = tail call i32 @llvm.smin.i32(i32 %91, i32 127) - %93 = tail call i32 @llvm.smax.i32(i32 %92, i32 0) + %92 = tail call i32 @llvm.smax.i32(i32 %91, i32 0) + %93 = tail call i32 @llvm.umin.i32(i32 %92, i32 127) %94 = zext nneg i32 %93 to i64 %95 = getelementptr inbounds nuw [128 x i16], ptr @kAcTable, i64 0, i64 %94 %96 = load i16, ptr %95, align 2, !tbaa !46 @@ -402,15 +402,15 @@ SimplifySegments.exit: ; preds = %177, %._crit_edge.i %194 = load i32, ptr %193, align 8, !tbaa !31 %195 = load i32, ptr %78, align 4, !tbaa !40 %196 = add nsw i32 %195, %194 - %197 = tail call i32 @llvm.smin.i32(i32 %196, i32 127) - %198 = tail call i32 @llvm.smax.i32(i32 %197, i32 0) + %197 = tail call i32 @llvm.smax.i32(i32 %196, i32 0) + %198 = tail call i32 @llvm.umin.i32(i32 %197, i32 127) %199 = zext nneg i32 %198 to i64 %200 = getelementptr inbounds nuw [128 x i8], ptr @kDcTable, i64 0, i64 %199 %201 = load i8, ptr %200, align 1, !tbaa !58 %202 = zext i8 %201 to i16 store i16 %202, ptr %192, align 8, !tbaa !46 - %203 = tail call i32 @llvm.smin.i32(i32 %194, i32 127) - %204 = tail call i32 @llvm.smax.i32(i32 %203, i32 0) + %203 = tail call i32 @llvm.smax.i32(i32 %194, i32 0) + %204 = tail call i32 @llvm.umin.i32(i32 %203, i32 127) %205 = zext nneg i32 %204 to i64 %206 = getelementptr inbounds nuw [128 x i16], ptr @kAcTable, i64 0, i64 %205 %207 = load i16, ptr %206, align 2, !tbaa !46 @@ -418,8 +418,8 @@ SimplifySegments.exit: ; preds = %177, %._crit_edge.i store i16 %207, ptr %208, align 2, !tbaa !46 %209 = load i32, ptr %79, align 8, !tbaa !41 %210 = add nsw i32 %209, %194 - %211 = tail call i32 @llvm.smin.i32(i32 %210, i32 127) - %212 = tail call i32 @llvm.smax.i32(i32 %211, i32 0) + %211 = tail call i32 @llvm.smax.i32(i32 %210, i32 0) + %212 = tail call i32 @llvm.umin.i32(i32 %211, i32 127) %213 = zext nneg i32 %212 to i64 %214 = getelementptr inbounds nuw [128 x i8], ptr @kDcTable, i64 0, i64 %213 %215 = load i8, ptr %214, align 1, !tbaa !58 @@ -429,8 +429,8 @@ SimplifySegments.exit: ; preds = %177, %._crit_edge.i store i16 %217, ptr %218, align 8, !tbaa !46 %219 = load i32, ptr %80, align 4, !tbaa !42 %220 = add nsw i32 %219, %194 - %221 = tail call i32 @llvm.smin.i32(i32 %220, i32 127) - %222 = tail call i32 @llvm.smax.i32(i32 %221, i32 0) + %221 = tail call i32 @llvm.smax.i32(i32 %220, i32 0) + %222 = tail call i32 @llvm.umin.i32(i32 %221, i32 127) %223 = zext nneg i32 %222 to i64 %224 = getelementptr inbounds nuw [128 x i16], ptr @kAcTable2, i64 0, i64 %223 %225 = load i16, ptr %224, align 2, !tbaa !46 @@ -438,8 +438,8 @@ SimplifySegments.exit: ; preds = %177, %._crit_edge.i store i16 %225, ptr %226, align 2, !tbaa !46 %227 = load i32, ptr %81, align 8, !tbaa !43 %228 = add nsw i32 %227, %194 - %229 = tail call i32 @llvm.smin.i32(i32 %228, i32 117) - %230 = tail call i32 @llvm.smax.i32(i32 %229, i32 0) + %229 = tail call i32 @llvm.smax.i32(i32 %228, i32 0) + %230 = tail call i32 @llvm.umin.i32(i32 %229, i32 117) %231 = zext nneg i32 %230 to i64 %232 = getelementptr inbounds nuw [128 x i8], ptr @kDcTable, i64 0, i64 %231 %233 = load i8, ptr %232, align 1, !tbaa !58 @@ -448,8 +448,8 @@ SimplifySegments.exit: ; preds = %177, %._crit_edge.i store i16 %234, ptr %235, align 8, !tbaa !46 %236 = load i32, ptr %82, align 4, !tbaa !44 %237 = add nsw i32 %236, %194 - %238 = tail call i32 @llvm.smin.i32(i32 %237, i32 127) - %239 = tail call i32 @llvm.smax.i32(i32 %238, i32 0) + %238 = tail call i32 @llvm.smax.i32(i32 %237, i32 0) + %239 = tail call i32 @llvm.umin.i32(i32 %238, i32 127) %240 = zext nneg i32 %239 to i64 %241 = getelementptr inbounds nuw [128 x i16], ptr @kAcTable, i64 0, i64 %240 %242 = load i16, ptr %241, align 2, !tbaa !46 diff --git a/bench/linux/optimized/ntp.ll b/bench/linux/optimized/ntp.ll index 25983ce4f22..9e61c78f75a 100644 --- a/bench/linux/optimized/ntp.ll +++ b/bench/linux/optimized/ntp.ll @@ -444,8 +444,8 @@ define dso_local range(i32 0, 6) i32 @__do_adjtimex(ptr noundef captures(none) i %86 = getelementptr inbounds nuw i8, ptr %0, i64 16 %87 = load i64, ptr %86, align 8 %88 = mul i64 %87, 65536000 - %89 = tail call i64 @llvm.smin.i64(i64 %88, i64 2147483648000000) - %90 = tail call i64 @llvm.smax.i64(i64 %89, i64 -2147483648000000) + %89 = tail call i64 @llvm.smax.i64(i64 %88, i64 -2147483648000000) + %90 = tail call i64 @llvm.smin.i64(i64 %89, i64 2147483648000000) store i64 %90, ptr @time_freq, align 8 br label %91 @@ -483,8 +483,8 @@ define dso_local range(i32 0, 6) i32 @__do_adjtimex(ptr noundef captures(none) i %110 = icmp eq i32 %109, 0 %111 = add i64 %108, 4 %spec.select = select i1 %110, i64 %111, i64 %108 - %112 = tail call i64 @llvm.smin.i64(i64 %spec.select, i64 10) - %113 = tail call i64 @llvm.smax.i64(i64 %112, i64 0) + %112 = tail call i64 @llvm.smax.i64(i64 %spec.select, i64 0) + %113 = tail call i64 @llvm.umin.i64(i64 %112, i64 10) store i64 %113, ptr @time_constant, align 8 br label %114 @@ -594,8 +594,8 @@ define dso_local range(i32 0, 6) i32 @__do_adjtimex(ptr noundef captures(none) i %182 = load i64, ptr @time_freq, align 8 %183 = add nsw i64 %182, %168 %184 = add i64 %183, %181 - %185 = tail call i64 @llvm.smin.i64(i64 %184, i64 2147483648000000) - %186 = tail call i64 @llvm.smax.i64(i64 %185, i64 -2147483648000000) + %185 = tail call i64 @llvm.smax.i64(i64 %184, i64 -2147483648000000) + %186 = tail call i64 @llvm.smin.i64(i64 %185, i64 2147483648000000) store i64 %186, ptr @time_freq, align 8 %187 = shl nsw i64 %143, 32 %188 = sdiv i64 %187, 1000 @@ -1003,6 +1003,9 @@ declare i64 @llvm.smin.i64(i64, i64) #8 ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.smax.i64(i64, i64) #8 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.umin.i64(i64, i64) #9 + attributes #0 = { fn_ret_thunk_extern mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(readwrite, argmem: none, inaccessiblemem: none) "min-legal-vector-width"="0" "no-jump-tables"="true" "no-trapping-math"="true" "patchable-function-entry"="0" "patchable-function-prefix"="16" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+retpoline-external-thunk,+retpoline-indirect-branches,+retpoline-indirect-calls,-3dnow,-3dnowa,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sha512,-sm3,-sm4,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" "tune-cpu"="generic" } attributes #1 = { fn_ret_thunk_extern mustprogress nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(read, argmem: none, inaccessiblemem: none) "min-legal-vector-width"="0" "no-jump-tables"="true" "no-trapping-math"="true" "patchable-function-entry"="0" "patchable-function-prefix"="16" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+retpoline-external-thunk,+retpoline-indirect-branches,+retpoline-indirect-calls,-3dnow,-3dnowa,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sha512,-sm3,-sm4,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" "tune-cpu"="generic" } attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } @@ -1012,8 +1015,9 @@ attributes #5 = { cold fn_ret_thunk_extern nounwind null_pointer_is_valid optsiz attributes #6 = { null_pointer_is_valid "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+retpoline-external-thunk,+retpoline-indirect-branches,+retpoline-indirect-calls,-3dnow,-3dnowa,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sha512,-sm3,-sm4,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" "tune-cpu"="generic" } attributes #7 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) } attributes #8 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } -attributes #9 = { cold nounwind } -attributes #10 = { nounwind } +attributes #9 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #10 = { cold nounwind } +attributes #11 = { nounwind } !llvm.module.flags = !{!0, !1, !2, !3, !4} diff --git a/bench/llama.cpp/optimized/ggml-quants.ll b/bench/llama.cpp/optimized/ggml-quants.ll index 338c7aa2fbf..b9c1ede5968 100644 --- a/bench/llama.cpp/optimized/ggml-quants.ll +++ b/bench/llama.cpp/optimized/ggml-quants.ll @@ -1462,9 +1462,9 @@ make_qkx2_quants.exit: ; preds = %.loopexit.i, %.preh %218 = fadd float %217, 0x4168000000000000 %219 = bitcast float %218 to i32 %220 = and i32 %219, 8388607 - %221 = tail call i32 @llvm.umin.i32(i32 %220, i32 4194307) - %222 = tail call i32 @llvm.umax.i32(i32 %221, i32 4194304) - %223 = trunc i32 %222 to i8 + %221 = tail call i32 @llvm.usub.sat.i32(i32 %220, i32 4194304) + %222 = tail call i32 @llvm.umin.i32(i32 %221, i32 3) + %223 = trunc nuw nsw i32 %222 to i8 %224 = getelementptr inbounds nuw [256 x i8], ptr %4, i64 0, i64 %213 store i8 %223, ptr %224, align 1, !tbaa !13 %indvars.iv.next168 = add nuw nsw i64 %indvars.iv167, 1 @@ -2459,9 +2459,9 @@ make_qp_quants.exit.us: ; preds = %370, %.preheader191 %449 = fadd float %448, 0x4168000000000000 %450 = bitcast float %449 to i32 %451 = and i32 %450, 8388607 - %452 = tail call i32 @llvm.umin.i32(i32 %451, i32 4194307) - %453 = tail call i32 @llvm.umax.i32(i32 %452, i32 4194304) - %454 = trunc i32 %453 to i8 + %452 = tail call i32 @llvm.usub.sat.i32(i32 %451, i32 4194304) + %453 = tail call i32 @llvm.umin.i32(i32 %452, i32 3) + %454 = trunc nuw nsw i32 %453 to i8 %455 = getelementptr inbounds nuw [256 x i8], ptr %6, i64 0, i64 %444 store i8 %454, ptr %455, align 1, !tbaa !13, !noalias !86 %indvars.iv.next159.i.us = add nuw nsw i64 %indvars.iv158.i.us, 1 @@ -2625,8 +2625,8 @@ define void @quantize_row_q3_K_ref(ptr noalias noundef readonly captures(none) % %30 = fsub float 0x4168000000000000, %29 %31 = bitcast float %30 to i32 %32 = and i32 %31, 8388607 - %33 = tail call i32 @llvm.umin.i32(i32 %32, i32 4194307) - %34 = tail call i32 @llvm.umax.i32(i32 %33, i32 4194300) + %33 = tail call i32 @llvm.umax.i32(i32 %32, i32 4194300) + %34 = tail call i32 @llvm.umin.i32(i32 %33, i32 4194307) %35 = add nsw i32 %34, -4194304 %36 = trunc i32 %34 to i8 %37 = getelementptr inbounds nuw i8, ptr %17, i64 %indvars.iv196.i @@ -2680,8 +2680,8 @@ define void @quantize_row_q3_K_ref(ptr noalias noundef readonly captures(none) % %63 = fadd float %62, 0x4168000000000000 %64 = bitcast float %63 to i32 %65 = and i32 %64, 8388607 - %66 = tail call i32 @llvm.umin.i32(i32 %65, i32 4194307) - %67 = tail call i32 @llvm.umax.i32(i32 %66, i32 4194300) + %66 = tail call i32 @llvm.umax.i32(i32 %65, i32 4194300) + %67 = tail call i32 @llvm.umin.i32(i32 %66, i32 4194307) %68 = add nsw i32 %67, -4194304 %69 = sext i8 %51 to i32 %.not.i = icmp eq i32 %68, %69 @@ -2786,8 +2786,8 @@ make_q3_quants.exit: ; preds = %.preheader.preheade %127 = bitcast float %126 to i32 %sext = shl i32 %127, 24 %128 = ashr exact i32 %sext, 24 - %129 = tail call i32 @llvm.smin.i32(i32 %128, i32 31) - %130 = tail call i32 @llvm.smax.i32(i32 %129, i32 -32) + %129 = tail call i32 @llvm.smax.i32(i32 %128, i32 -32) + %130 = tail call i32 @llvm.smin.i32(i32 %129, i32 31) %131 = icmp samesign ult i64 %indvars.iv147, 8 br i1 %131, label %132, label %136 @@ -2894,8 +2894,8 @@ make_q3_quants.exit: ; preds = %.preheader.preheade %196 = fadd float %195, 0x4168000000000000 %197 = bitcast float %196 to i32 %198 = and i32 %197, 8388607 - %199 = tail call i32 @llvm.umin.i32(i32 %198, i32 4194307) - %200 = tail call i32 @llvm.umax.i32(i32 %199, i32 4194300) + %199 = tail call i32 @llvm.umax.i32(i32 %198, i32 4194300) + %200 = tail call i32 @llvm.umin.i32(i32 %199, i32 4194307) %201 = trunc i32 %200 to i8 %202 = add nsw i8 %201, 4 %203 = getelementptr inbounds nuw [256 x i8], ptr %4, i64 0, i64 %192 @@ -3272,8 +3272,8 @@ define i64 @quantize_q3_K(ptr noalias noundef readonly captures(none) %0, ptr no %54 = fsub float 0x4168000000000000, %53 %55 = bitcast float %54 to i32 %56 = and i32 %55, 8388607 - %57 = tail call i32 @llvm.umin.i32(i32 %56, i32 4194307) - %58 = tail call i32 @llvm.umax.i32(i32 %57, i32 4194300) + %57 = tail call i32 @llvm.umax.i32(i32 %56, i32 4194300) + %58 = tail call i32 @llvm.umin.i32(i32 %57, i32 4194307) %..i.i.us = add nsw i32 %58, -4194304 %59 = trunc i32 %58 to i8 %60 = add nsw i8 %59, 4 @@ -3321,8 +3321,8 @@ define i64 @quantize_q3_K(ptr noalias noundef readonly captures(none) %0, ptr no %84 = fadd float %83, 0x4168000000000000 %85 = bitcast float %84 to i32 %86 = and i32 %85, 8388607 - %87 = tail call i32 @llvm.umin.i32(i32 %86, i32 4194307) - %88 = tail call i32 @llvm.umax.i32(i32 %87, i32 4194300) + %87 = tail call i32 @llvm.umax.i32(i32 %86, i32 4194300) + %88 = tail call i32 @llvm.umin.i32(i32 %87, i32 4194307) %.202.i.i.us = add nsw i32 %88, -4194304 %89 = getelementptr inbounds nuw float, ptr %8, i64 %indvars.iv236.i.i.us %90 = load float, ptr %89, align 4, !tbaa !8, !alias.scope !194, !noalias !200 @@ -3353,8 +3353,8 @@ define i64 @quantize_q3_K(ptr noalias noundef readonly captures(none) %0, ptr no %104 = fadd float %103, 0x4168000000000000 %105 = bitcast float %104 to i32 %106 = and i32 %105, 8388607 - %107 = tail call i32 @llvm.umin.i32(i32 %106, i32 4194307) - %108 = tail call i32 @llvm.umax.i32(i32 %107, i32 4194300) + %107 = tail call i32 @llvm.umax.i32(i32 %106, i32 4194300) + %108 = tail call i32 @llvm.umin.i32(i32 %107, i32 4194307) %109 = trunc i32 %108 to i8 %110 = add nsw i8 %109, 4 %111 = getelementptr inbounds nuw i8, ptr %46, i64 %indvars.iv246.i.i.us @@ -3428,8 +3428,8 @@ make_qx_quants.exit.i.us: ; preds = %115, %.preheader.pr %134 = fsub float 0x4168000000000000, %133 %135 = bitcast float %134 to i32 %136 = and i32 %135, 8388607 - %137 = tail call i32 @llvm.umin.i32(i32 %136, i32 4194335) - %138 = tail call i32 @llvm.umax.i32(i32 %137, i32 4194272) + %137 = tail call i32 @llvm.umax.i32(i32 %136, i32 4194272) + %138 = tail call i32 @llvm.umin.i32(i32 %137, i32 4194335) %..i.us = add nsw i32 %138, -4194304 %139 = trunc i32 %138 to i8 %140 = add nsw i8 %139, 32 @@ -3477,8 +3477,8 @@ make_qx_quants.exit.i.us: ; preds = %115, %.preheader.pr %164 = fadd float %163, 0x4168000000000000 %165 = bitcast float %164 to i32 %166 = and i32 %165, 8388607 - %167 = tail call i32 @llvm.umin.i32(i32 %166, i32 4194335) - %168 = tail call i32 @llvm.umax.i32(i32 %167, i32 4194272) + %167 = tail call i32 @llvm.umax.i32(i32 %166, i32 4194272) + %168 = tail call i32 @llvm.umin.i32(i32 %167, i32 4194335) %.202.i.us = add nsw i32 %168, -4194304 %169 = getelementptr inbounds nuw float, ptr %9, i64 %indvars.iv236.i.us %170 = load float, ptr %169, align 4, !tbaa !8, !alias.scope !212, !noalias !216 @@ -3509,8 +3509,8 @@ make_qx_quants.exit.i.us: ; preds = %115, %.preheader.pr %184 = fadd float %183, 0x4168000000000000 %185 = bitcast float %184 to i32 %186 = and i32 %185, 8388607 - %187 = tail call i32 @llvm.umin.i32(i32 %186, i32 4194335) - %188 = tail call i32 @llvm.umax.i32(i32 %187, i32 4194272) + %187 = tail call i32 @llvm.umax.i32(i32 %186, i32 4194272) + %188 = tail call i32 @llvm.umin.i32(i32 %187, i32 4194335) %189 = trunc i32 %188 to i8 %190 = add nsw i8 %189, 32 %191 = getelementptr inbounds nuw i8, ptr %10, i64 %indvars.iv246.i.us @@ -3661,8 +3661,8 @@ make_qx_quants.exit.us: ; preds = %195, %.preheader.pr %283 = fadd float %282, 0x4168000000000000 %284 = bitcast float %283 to i32 %285 = and i32 %284, 8388607 - %286 = tail call i32 @llvm.umin.i32(i32 %285, i32 4194307) - %287 = tail call i32 @llvm.umax.i32(i32 %286, i32 4194300) + %286 = tail call i32 @llvm.umax.i32(i32 %285, i32 4194300) + %287 = tail call i32 @llvm.umin.i32(i32 %286, i32 4194307) %288 = trunc i32 %287 to i8 %289 = add nsw i8 %288, 4 %290 = getelementptr inbounds nuw [256 x i8], ptr %6, i64 0, i64 %279 @@ -4221,9 +4221,9 @@ get_scale_min_k4.exit: ; preds = %232, %238 %266 = fadd float %265, 0x4168000000000000 %267 = bitcast float %266 to i32 %268 = and i32 %267, 8388607 - %269 = tail call i32 @llvm.umin.i32(i32 %268, i32 4194319) - %270 = tail call i32 @llvm.umax.i32(i32 %269, i32 4194304) - %271 = trunc i32 %270 to i8 + %269 = tail call i32 @llvm.usub.sat.i32(i32 %268, i32 4194304) + %270 = tail call i32 @llvm.umin.i32(i32 %269, i32 15) + %271 = trunc nuw nsw i32 %270 to i8 %272 = getelementptr inbounds nuw [256 x i8], ptr %4, i64 0, i64 %261 store i8 %271, ptr %272, align 1, !tbaa !13 %indvars.iv.next178 = add nuw nsw i64 %indvars.iv177, 1 @@ -5300,9 +5300,9 @@ get_scale_min_k4.exit.i.us: ; preds = %468, %454 %488 = fadd float %487, 0x4168000000000000 %489 = bitcast float %488 to i32 %490 = and i32 %489, 8388607 - %491 = tail call i32 @llvm.umin.i32(i32 %490, i32 4194319) - %492 = tail call i32 @llvm.umax.i32(i32 %491, i32 4194304) - %493 = trunc i32 %492 to i8 + %491 = tail call i32 @llvm.usub.sat.i32(i32 %490, i32 4194304) + %492 = tail call i32 @llvm.umin.i32(i32 %491, i32 15) + %493 = trunc nuw nsw i32 %492 to i8 %494 = getelementptr inbounds nuw [256 x i8], ptr %6, i64 0, i64 %483 store i8 %493, ptr %494, align 1, !tbaa !13, !noalias !279 %indvars.iv.next185.i.us = add nuw nsw i64 %indvars.iv184.i.us, 1 @@ -5818,9 +5818,9 @@ get_scale_min_k4.exit: ; preds = %232, %238 %266 = fadd float %265, 0x4168000000000000 %267 = bitcast float %266 to i32 %268 = and i32 %267, 8388607 - %269 = tail call i32 @llvm.umin.i32(i32 %268, i32 4194335) - %270 = tail call i32 @llvm.umax.i32(i32 %269, i32 4194304) - %271 = trunc i32 %270 to i8 + %269 = tail call i32 @llvm.usub.sat.i32(i32 %268, i32 4194304) + %270 = tail call i32 @llvm.umin.i32(i32 %269, i32 31) + %271 = trunc nuw nsw i32 %270 to i8 %272 = getelementptr inbounds nuw [256 x i8], ptr %4, i64 0, i64 %261 store i8 %271, ptr %272, align 1, !tbaa !13 %indvars.iv.next206 = add nuw nsw i64 %indvars.iv205, 1 @@ -6940,9 +6940,9 @@ get_scale_min_k4.exit.i.us: ; preds = %470, %456 %490 = fadd float %489, 0x4168000000000000 %491 = bitcast float %490 to i32 %492 = and i32 %491, 8388607 - %493 = tail call i32 @llvm.umin.i32(i32 %492, i32 4194335) - %494 = tail call i32 @llvm.umax.i32(i32 %493, i32 4194304) - %495 = trunc i32 %494 to i8 + %493 = tail call i32 @llvm.usub.sat.i32(i32 %492, i32 4194304) + %494 = tail call i32 @llvm.umin.i32(i32 %493, i32 31) + %495 = trunc nuw nsw i32 %494 to i8 %496 = getelementptr inbounds nuw [256 x i8], ptr %6, i64 0, i64 %485 store i8 %495, ptr %496, align 1, !tbaa !13, !noalias !387 %indvars.iv.next215.i.us = add nuw nsw i64 %indvars.iv214.i.us, 1 @@ -7120,8 +7120,8 @@ define void @quantize_row_q6_K_ref(ptr noalias noundef readonly captures(none) % %27 = fsub float 0x4168000000000000, %26 %28 = bitcast float %27 to i32 %29 = and i32 %28, 8388607 - %30 = tail call i32 @llvm.umin.i32(i32 %29, i32 4194335) - %31 = tail call i32 @llvm.umax.i32(i32 %30, i32 4194272) + %30 = tail call i32 @llvm.umax.i32(i32 %29, i32 4194272) + %31 = tail call i32 @llvm.umin.i32(i32 %30, i32 4194335) %..i = add nsw i32 %31, -4194304 %32 = trunc i32 %31 to i8 %33 = add nsw i8 %32, 32 @@ -7168,8 +7168,8 @@ define void @quantize_row_q6_K_ref(ptr noalias noundef readonly captures(none) % %56 = fadd float %55, 0x4168000000000000 %57 = bitcast float %56 to i32 %58 = and i32 %57, 8388607 - %59 = tail call i32 @llvm.umin.i32(i32 %58, i32 4194335) - %60 = tail call i32 @llvm.umax.i32(i32 %59, i32 4194272) + %59 = tail call i32 @llvm.umax.i32(i32 %58, i32 4194272) + %60 = tail call i32 @llvm.umin.i32(i32 %59, i32 4194335) %.202.us.i = add nsw i32 %60, -4194304 %61 = fmul float %54, %54 %62 = fmul float %54, %61 @@ -7204,8 +7204,8 @@ define void @quantize_row_q6_K_ref(ptr noalias noundef readonly captures(none) % %78 = fadd float %77, 0x4168000000000000 %79 = bitcast float %78 to i32 %80 = and i32 %79, 8388607 - %81 = tail call i32 @llvm.umin.i32(i32 %80, i32 4194335) - %82 = tail call i32 @llvm.umax.i32(i32 %81, i32 4194272) + %81 = tail call i32 @llvm.umax.i32(i32 %80, i32 4194272) + %82 = tail call i32 @llvm.umin.i32(i32 %81, i32 4194335) %83 = trunc i32 %82 to i8 %84 = add nsw i8 %83, 32 %85 = getelementptr inbounds nuw i8, ptr %14, i64 %indvars.iv246.i @@ -7319,8 +7319,8 @@ make_qx_quants.exit: ; preds = %86, %.preheader.pre %148 = fadd float %147, 0x4168000000000000 %149 = bitcast float %148 to i32 %150 = and i32 %149, 8388607 - %151 = tail call i32 @llvm.umin.i32(i32 %150, i32 4194335) - %152 = tail call i32 @llvm.umax.i32(i32 %151, i32 4194272) + %151 = tail call i32 @llvm.umax.i32(i32 %150, i32 4194272) + %152 = tail call i32 @llvm.umin.i32(i32 %151, i32 4194335) %153 = trunc i32 %152 to i8 %154 = add nsw i8 %153, 32 %155 = getelementptr inbounds nuw [256 x i8], ptr %4, i64 0, i64 %144 @@ -7580,8 +7580,8 @@ define i64 @quantize_q6_K(ptr noalias noundef readonly captures(none) %0, ptr no %31 = fsub float 0x4168000000000000, %30 %32 = bitcast float %31 to i32 %33 = and i32 %32, 8388607 - %34 = tail call i32 @llvm.umin.i32(i32 %33, i32 4194335) - %35 = tail call i32 @llvm.umax.i32(i32 %34, i32 4194272) + %34 = tail call i32 @llvm.umax.i32(i32 %33, i32 4194272) + %35 = tail call i32 @llvm.umin.i32(i32 %34, i32 4194335) %..i.i.us = add nsw i32 %35, -4194304 %36 = trunc i32 %35 to i8 %37 = add nsw i8 %36, 32 @@ -7629,8 +7629,8 @@ define i64 @quantize_q6_K(ptr noalias noundef readonly captures(none) %0, ptr no %61 = fadd float %60, 0x4168000000000000 %62 = bitcast float %61 to i32 %63 = and i32 %62, 8388607 - %64 = tail call i32 @llvm.umin.i32(i32 %63, i32 4194335) - %65 = tail call i32 @llvm.umax.i32(i32 %64, i32 4194272) + %64 = tail call i32 @llvm.umax.i32(i32 %63, i32 4194272) + %65 = tail call i32 @llvm.umin.i32(i32 %64, i32 4194335) %.202.i.i.us = add nsw i32 %65, -4194304 %66 = getelementptr inbounds nuw float, ptr %22, i64 %indvars.iv236.i.i.us %67 = load float, ptr %66, align 4, !tbaa !8, !alias.scope !471, !noalias !476 @@ -7661,8 +7661,8 @@ define i64 @quantize_q6_K(ptr noalias noundef readonly captures(none) %0, ptr no %81 = fadd float %80, 0x4168000000000000 %82 = bitcast float %81 to i32 %83 = and i32 %82, 8388607 - %84 = tail call i32 @llvm.umin.i32(i32 %83, i32 4194335) - %85 = tail call i32 @llvm.umax.i32(i32 %84, i32 4194272) + %84 = tail call i32 @llvm.umax.i32(i32 %83, i32 4194272) + %85 = tail call i32 @llvm.umin.i32(i32 %84, i32 4194335) %86 = trunc i32 %85 to i8 %87 = add nsw i8 %86, 32 %88 = getelementptr inbounds nuw i8, ptr %23, i64 %indvars.iv246.i.i.us @@ -7780,8 +7780,8 @@ make_qx_quants.exit.i.us: ; preds = %92, %.preheader.pre %152 = fadd float %151, 0x4168000000000000 %153 = bitcast float %152 to i32 %154 = and i32 %153, 8388607 - %155 = tail call i32 @llvm.umin.i32(i32 %154, i32 4194335) - %156 = tail call i32 @llvm.umax.i32(i32 %155, i32 4194272) + %155 = tail call i32 @llvm.umax.i32(i32 %154, i32 4194272) + %156 = tail call i32 @llvm.umin.i32(i32 %155, i32 4194335) %157 = trunc i32 %156 to i8 %158 = add nsw i8 %157, 32 %159 = getelementptr inbounds nuw [256 x i8], ptr %6, i64 0, i64 %148 @@ -8078,8 +8078,8 @@ quantize_row_q4_0_ref.exit: ; preds = %45, %8 %97 = fsub float 0x4168000000000000, %96 %98 = bitcast float %97 to i32 %99 = and i32 %98, 8388607 - %100 = tail call i32 @llvm.umin.i32(i32 %99, i32 4194311) - %101 = tail call i32 @llvm.umax.i32(i32 %100, i32 4194296) + %100 = tail call i32 @llvm.umax.i32(i32 %99, i32 4194296) + %101 = tail call i32 @llvm.umin.i32(i32 %100, i32 4194311) %..i.us.us = add nsw i32 %101, -4194304 %102 = trunc i32 %101 to i8 %103 = add nsw i8 %102, 8 @@ -8127,8 +8127,8 @@ quantize_row_q4_0_ref.exit: ; preds = %45, %8 %127 = fadd float %126, 0x4168000000000000 %128 = bitcast float %127 to i32 %129 = and i32 %128, 8388607 - %130 = tail call i32 @llvm.umin.i32(i32 %129, i32 4194311) - %131 = tail call i32 @llvm.umax.i32(i32 %130, i32 4194296) + %130 = tail call i32 @llvm.umax.i32(i32 %129, i32 4194296) + %131 = tail call i32 @llvm.umin.i32(i32 %130, i32 4194311) %.202.i.us.us = add nsw i32 %131, -4194304 %132 = getelementptr inbounds nuw float, ptr %6, i64 %indvars.iv236.i.us.us %133 = load float, ptr %132, align 4, !tbaa !8, !alias.scope !503, !noalias !507 @@ -8159,8 +8159,8 @@ quantize_row_q4_0_ref.exit: ; preds = %45, %8 %147 = fadd float %146, 0x4168000000000000 %148 = bitcast float %147 to i32 %149 = and i32 %148, 8388607 - %150 = tail call i32 @llvm.umin.i32(i32 %149, i32 4194311) - %151 = tail call i32 @llvm.umax.i32(i32 %150, i32 4194296) + %150 = tail call i32 @llvm.umax.i32(i32 %149, i32 4194296) + %151 = tail call i32 @llvm.umin.i32(i32 %150, i32 4194311) %152 = trunc i32 %151 to i8 %153 = add nsw i8 %152, 8 %154 = getelementptr inbounds nuw i8, ptr %7, i64 %indvars.iv246.i.us.us @@ -8688,8 +8688,8 @@ define i64 @quantize_q5_0(ptr noalias noundef readonly captures(none) %0, ptr no %46 = fsub float 0x4168000000000000, %45 %47 = bitcast float %46 to i32 %48 = and i32 %47, 8388607 - %49 = tail call i32 @llvm.umin.i32(i32 %48, i32 4194319) - %50 = tail call i32 @llvm.umax.i32(i32 %49, i32 4194288) + %49 = tail call i32 @llvm.umax.i32(i32 %48, i32 4194288) + %50 = tail call i32 @llvm.umin.i32(i32 %49, i32 4194319) %..i.i.us.us = add nsw i32 %50, -4194304 %51 = trunc i32 %50 to i8 %52 = add nsw i8 %51, 16 @@ -8737,8 +8737,8 @@ define i64 @quantize_q5_0(ptr noalias noundef readonly captures(none) %0, ptr no %76 = fadd float %75, 0x4168000000000000 %77 = bitcast float %76 to i32 %78 = and i32 %77, 8388607 - %79 = tail call i32 @llvm.umin.i32(i32 %78, i32 4194319) - %80 = tail call i32 @llvm.umax.i32(i32 %79, i32 4194288) + %79 = tail call i32 @llvm.umax.i32(i32 %78, i32 4194288) + %80 = tail call i32 @llvm.umin.i32(i32 %79, i32 4194319) %.202.i.i.us.us = add nsw i32 %80, -4194304 %81 = getelementptr inbounds nuw float, ptr %6, i64 %indvars.iv236.i.i.us.us %82 = load float, ptr %81, align 4, !tbaa !8, !alias.scope !551, !noalias !556 @@ -8769,8 +8769,8 @@ define i64 @quantize_q5_0(ptr noalias noundef readonly captures(none) %0, ptr no %96 = fadd float %95, 0x4168000000000000 %97 = bitcast float %96 to i32 %98 = and i32 %97, 8388607 - %99 = tail call i32 @llvm.umin.i32(i32 %98, i32 4194319) - %100 = tail call i32 @llvm.umax.i32(i32 %99, i32 4194288) + %99 = tail call i32 @llvm.umax.i32(i32 %98, i32 4194288) + %100 = tail call i32 @llvm.umin.i32(i32 %99, i32 4194319) %101 = trunc i32 %100 to i8 %102 = add nsw i8 %101, 16 %103 = getelementptr inbounds nuw i8, ptr %7, i64 %indvars.iv246.i.i.us.us @@ -12021,9 +12021,9 @@ make_qp_quants.exit.i.us: ; preds = %219, %.preheader191 %234 = fadd float %233, 0x4168000000000000 %235 = bitcast float %234 to i32 %236 = and i32 %235, 8388607 - %237 = tail call i32 @llvm.umin.i32(i32 %236, i32 4194306) - %238 = tail call i32 @llvm.umax.i32(i32 %237, i32 4194304) - %239 = trunc i32 %238 to i8 + %237 = tail call i32 @llvm.usub.sat.i32(i32 %236, i32 4194304) + %238 = tail call i32 @llvm.umin.i32(i32 %237, i32 2) + %239 = trunc nuw nsw i32 %238 to i8 %240 = getelementptr inbounds nuw [32 x i8], ptr %10, i64 0, i64 %229 store i8 %239, ptr %240, align 1, !tbaa !13, !noalias !709 %indvars.iv.next459.i.us = add nuw nsw i64 %indvars.iv458.i.us, 1 @@ -12207,12 +12207,12 @@ iq2_find_best_neighbour.exit.i.us: ; preds = %286, %249 %329 = fadd float %328, 0x4168000000000000 %330 = bitcast float %329 to i32 %331 = and i32 %330, 8388607 - %332 = tail call i32 @llvm.umin.i32(i32 %331, i32 4194306) - %333 = tail call i32 @llvm.umax.i32(i32 %332, i32 4194304) + %332 = tail call i32 @llvm.usub.sat.i32(i32 %331, i32 4194304) + %333 = tail call i32 @llvm.umin.i32(i32 %332, i32 2) %indvars.iv474.tr.i.us = trunc i64 %indvars.iv474.i.us to i32 %334 = shl i32 %indvars.iv474.tr.i.us, 1 - %335 = shl i32 %333, %334 - %336 = trunc i32 %335 to i16 + %335 = shl nuw nsw i32 %333, %334 + %336 = trunc nuw i32 %335 to i16 %337 = or i16 %.0287401.i.us, %336 %indvars.iv.next475.i.us = add nuw nsw i64 %indvars.iv474.i.us, 1 %exitcond476.not.i.us = icmp eq i64 %indvars.iv.next475.i.us, 8 @@ -12506,9 +12506,9 @@ iq2_find_best_neighbour.exit355.i.us: ; preds = %375, %.iq2_find_bes %490 = fadd float %489, 0x4168000000000000 %491 = bitcast float %490 to i32 %492 = and i32 %491, 8388607 - %493 = tail call i32 @llvm.umin.i32(i32 %492, i32 4194319) - %494 = tail call i32 @llvm.umax.i32(i32 %493, i32 4194304) - %495 = shl i32 %494, 28 + %493 = tail call i32 @llvm.usub.sat.i32(i32 %492, i32 4194304) + %494 = tail call i32 @llvm.umin.i32(i32 %493, i32 15) + %495 = shl nuw i32 %494, 28 %496 = shl nuw nsw i64 %indvars.iv504.i.us, 1 %497 = or disjoint i64 %496, 1 %498 = getelementptr inbounds nuw [16 x i32], ptr %13, i64 0, i64 %497 @@ -12890,9 +12890,9 @@ define i64 @quantize_iq2_xs(ptr noalias noundef readonly captures(none) %0, ptr %119 = fadd float %118, 0x4168000000000000 %120 = bitcast float %119 to i32 %121 = and i32 %120, 8388607 - %122 = tail call i32 @llvm.umin.i32(i32 %121, i32 4194306) - %123 = tail call i32 @llvm.umax.i32(i32 %122, i32 4194304) - %124 = trunc i32 %123 to i8 + %122 = tail call i32 @llvm.usub.sat.i32(i32 %121, i32 4194304) + %123 = tail call i32 @llvm.umin.i32(i32 %122, i32 2) + %124 = trunc nuw nsw i32 %123 to i8 %125 = getelementptr inbounds nuw [16 x i8], ptr %10, i64 0, i64 %114 store i8 %124, ptr %125, align 1, !tbaa !13, !noalias !789 %indvars.iv.next470.i.us = add nuw nsw i64 %indvars.iv469.i.us, 1 @@ -13102,14 +13102,14 @@ iq2_find_best_neighbour.exit.i.us: ; preds = %168, %134 %216 = fadd float %215, 0x4168000000000000 %217 = bitcast float %216 to i32 %218 = and i32 %217, 8388607 - %219 = tail call i32 @llvm.umin.i32(i32 %218, i32 4194306) - %220 = tail call i32 @llvm.umax.i32(i32 %219, i32 4194304) + %219 = tail call i32 @llvm.usub.sat.i32(i32 %218, i32 4194304) + %220 = tail call i32 @llvm.umin.i32(i32 %219, i32 2) %indvars.iv493.tr.i.us = trunc i64 %indvars.iv493.i.us to i32 %221 = shl i32 %indvars.iv493.tr.i.us, 1 - %222 = shl i32 %220, %221 - %223 = trunc i32 %222 to i16 + %222 = shl nuw nsw i32 %220, %221 + %223 = trunc nuw i32 %222 to i16 %224 = or i16 %.0302418.i.us, %223 - %225 = trunc i32 %220 to i8 + %225 = trunc nuw nsw i32 %220 to i8 %226 = getelementptr inbounds nuw [16 x i8], ptr %9, i64 0, i64 %211 store i8 %225, ptr %226, align 1, !tbaa !13, !noalias !789 %indvars.iv.next494.i.us = add nuw nsw i64 %indvars.iv493.i.us, 1 @@ -13363,8 +13363,8 @@ iq2_find_best_neighbour.exit369.i.us: ; preds = %261, %227, %208 %359 = fadd float %358, 0x4168000000000000 %360 = bitcast float %359 to i32 %361 = and i32 %360, 8388607 - %362 = tail call i32 @llvm.umin.i32(i32 %361, i32 4194319) - %363 = tail call i32 @llvm.umax.i32(i32 %362, i32 4194304) + %362 = tail call i32 @llvm.usub.sat.i32(i32 %361, i32 4194304) + %363 = tail call i32 @llvm.umin.i32(i32 %362, i32 15) %364 = and i64 %indvars.iv516.i.us, 1 %365 = icmp eq i64 %364, 0 br i1 %365, label %373, label %366 @@ -13374,14 +13374,14 @@ iq2_find_best_neighbour.exit369.i.us: ; preds = %261, %227, %208 %368 = and i64 %367, 2147483647 %369 = getelementptr inbounds nuw [8 x i8], ptr %26, i64 0, i64 %368 %370 = load i8, ptr %369, align 1, !tbaa !13, !alias.scope !795, !noalias !798 - %.tr.i.us = trunc i32 %363 to i8 + %.tr.i.us = trunc nuw nsw i32 %363 to i8 %371 = shl nuw i8 %.tr.i.us, 4 %372 = or i8 %370, %371 store i8 %372, ptr %369, align 1, !tbaa !13, !alias.scope !795, !noalias !798 br label %378 373: ; preds = %354 - %374 = trunc i32 %363 to i8 + %374 = trunc nuw nsw i32 %363 to i8 %375 = lshr exact i64 %indvars.iv516.i.us, 1 %376 = and i64 %375, 2147483647 %377 = getelementptr inbounds nuw [8 x i8], ptr %26, i64 0, i64 %376 @@ -14241,9 +14241,9 @@ define internal fastcc void @quantize_row_iq3_xxs_impl(ptr noalias noundef reado %140 = fadd float %139, 0x4168000000000000 %141 = bitcast float %140 to i32 %142 = and i32 %141, 8388607 - %143 = tail call i32 @llvm.umin.i32(i32 %142, i32 4194311) - %144 = tail call i32 @llvm.umax.i32(i32 %143, i32 4194304) - %145 = trunc i32 %144 to i8 + %143 = tail call i32 @llvm.usub.sat.i32(i32 %142, i32 4194304) + %144 = tail call i32 @llvm.umin.i32(i32 %143, i32 7) + %145 = trunc nuw nsw i32 %144 to i8 %146 = getelementptr inbounds nuw [32 x i8], ptr %9, i64 0, i64 %135 store i8 %145, ptr %146, align 1, !tbaa !13 %indvars.iv.next520 = add nuw nsw i64 %indvars.iv519, 1 @@ -14473,12 +14473,12 @@ iq3_find_best_neighbour.exit: ; preds = %196, %147 %254 = fadd float %253, 0x4168000000000000 %255 = bitcast float %254 to i32 %256 = and i32 %255, 8388607 - %257 = tail call i32 @llvm.umin.i32(i32 %256, i32 4194311) - %258 = tail call i32 @llvm.umax.i32(i32 %257, i32 4194304) + %257 = tail call i32 @llvm.usub.sat.i32(i32 %256, i32 4194304) + %258 = tail call i32 @llvm.umin.i32(i32 %257, i32 7) %259 = trunc i64 %indvars.iv545 to i32 %260 = mul i32 %259, 3 - %261 = shl i32 %258, %260 - %262 = trunc i32 %261 to i16 + %261 = shl nuw nsw i32 %258, %260 + %262 = trunc nuw nsw i32 %261 to i16 %263 = or i16 %.0336459, %262 %indvars.iv.next546 = add nuw nsw i64 %indvars.iv545, 1 %exitcond547.not = icmp eq i64 %indvars.iv.next546, 4 @@ -14795,9 +14795,9 @@ iq3_find_best_neighbour.exit406: ; preds = %298, %.iq3_find_bes %426 = fadd float %425, 0x4168000000000000 %427 = bitcast float %426 to i32 %428 = and i32 %427, 8388607 - %429 = tail call i32 @llvm.umin.i32(i32 %428, i32 4194319) - %430 = tail call i32 @llvm.umax.i32(i32 %429, i32 4194304) - %431 = shl i32 %430, 28 + %429 = tail call i32 @llvm.usub.sat.i32(i32 %428, i32 4194304) + %430 = tail call i32 @llvm.umin.i32(i32 %429, i32 15) + %431 = shl nuw i32 %430, 28 %432 = getelementptr inbounds nuw i32, ptr %29, i64 %indvars.iv575 %433 = load i32, ptr %432, align 4, !tbaa !90 %434 = or i32 %431, %433 @@ -15075,9 +15075,9 @@ define i64 @quantize_iq3_s(ptr noalias noundef readonly captures(none) %0, ptr n %102 = fadd float %101, 0x4168000000000000 %103 = bitcast float %102 to i32 %104 = and i32 %103, 8388607 - %105 = tail call i32 @llvm.umin.i32(i32 %104, i32 4194311) - %106 = tail call i32 @llvm.umax.i32(i32 %105, i32 4194304) - %107 = trunc i32 %106 to i8 + %105 = tail call i32 @llvm.usub.sat.i32(i32 %104, i32 4194304) + %106 = tail call i32 @llvm.umin.i32(i32 %105, i32 7) + %107 = trunc nuw nsw i32 %106 to i8 %108 = getelementptr inbounds nuw i8, ptr %10, i64 %97 store i8 %107, ptr %108, align 1, !tbaa !13, !noalias !942 %indvars.iv.next555.i.us = add nuw nsw i64 %indvars.iv554.i.us, 1 @@ -15282,12 +15282,12 @@ iq3_find_best_neighbour.exit.i.us: ; preds = %154, %117 %200 = fadd float %199, 0x4168000000000000 %201 = bitcast float %200 to i32 %202 = and i32 %201, 8388607 - %203 = tail call i32 @llvm.umin.i32(i32 %202, i32 4194311) - %204 = tail call i32 @llvm.umax.i32(i32 %203, i32 4194304) + %203 = tail call i32 @llvm.usub.sat.i32(i32 %202, i32 4194304) + %204 = tail call i32 @llvm.umin.i32(i32 %203, i32 7) %205 = trunc i64 %indvars.iv582.i.us to i32 %206 = mul i32 %205, 3 - %207 = shl i32 %204, %206 - %208 = trunc i32 %207 to i16 + %207 = shl nuw nsw i32 %204, %206 + %208 = trunc nuw nsw i32 %207 to i16 %209 = or i16 %.0380494.i.us, %208 %indvars.iv.next583.i.us = add nuw nsw i64 %indvars.iv582.i.us, 1 %exitcond584.not.i.us = icmp eq i64 %indvars.iv.next583.i.us, 4 @@ -15577,8 +15577,8 @@ iq3_find_best_neighbour.exit443.i.us: ; preds = %246, %.iq3_find_bes %353 = fadd float %352, 0x4168000000000000 %354 = bitcast float %353 to i32 %355 = and i32 %354, 8388607 - %356 = tail call i32 @llvm.umin.i32(i32 %355, i32 4194319) - %357 = tail call i32 @llvm.umax.i32(i32 %356, i32 4194304) + %356 = tail call i32 @llvm.usub.sat.i32(i32 %355, i32 4194304) + %357 = tail call i32 @llvm.umin.i32(i32 %356, i32 15) %358 = getelementptr inbounds nuw i8, ptr %349, i64 4 %359 = load float, ptr %358, align 4, !tbaa !8, !noalias !942 %360 = tail call float @llvm.fmuladd.f32(float %346, float %359, float -1.000000e+00) @@ -15586,11 +15586,11 @@ iq3_find_best_neighbour.exit443.i.us: ; preds = %246, %.iq3_find_bes %362 = fadd float %361, 0x4168000000000000 %363 = bitcast float %362 to i32 %364 = and i32 %363, 8388607 - %365 = tail call i32 @llvm.umin.i32(i32 %364, i32 4194319) - %366 = tail call i32 @llvm.umax.i32(i32 %365, i32 4194304) + %365 = tail call i32 @llvm.usub.sat.i32(i32 %364, i32 4194304) + %366 = tail call i32 @llvm.umin.i32(i32 %365, i32 15) %367 = shl nuw nsw i32 %366, 4 - %368 = add nuw nsw i32 %367, %357 - %369 = trunc i32 %368 to i8 + %368 = or disjoint i32 %367, %357 + %369 = trunc nuw i32 %368 to i8 %370 = lshr exact i64 %indvars.iv616.i.us, 1 %371 = getelementptr inbounds nuw [4 x i8], ptr %347, i64 0, i64 %370 store i8 %369, ptr %371, align 1, !tbaa !13, !alias.scope !948, !noalias !951 @@ -16269,8 +16269,8 @@ define i64 @quantize_iq1_s(ptr noalias noundef readonly captures(none) %0, ptr n %286 = fadd float %285, 0x4168000000000000 %287 = bitcast float %286 to i32 %288 = and i32 %287, 8388607 - %289 = call i32 @llvm.umin.i32(i32 %288, i32 4194311) - %290 = call i32 @llvm.umax.i32(i32 %289, i32 4194304) + %289 = call i32 @llvm.usub.sat.i32(i32 %288, i32 4194304) + %290 = call i32 @llvm.umin.i32(i32 %289, i32 7) %291 = getelementptr inbounds nuw i8, ptr %13, i64 %indvars.iv486.i.us %292 = load i8, ptr %291, align 1, !tbaa !13, !noalias !1021 %293 = icmp eq i8 %292, -1 @@ -16278,7 +16278,7 @@ define i64 @quantize_iq1_s(ptr noalias noundef readonly captures(none) %0, ptr n %spec.select.i.us = select i1 %293, i32 %294, i32 %290 %295 = getelementptr inbounds nuw [8 x i16], ptr %40, i64 0, i64 %indvars.iv486.i.us %296 = load i16, ptr %295, align 2, !tbaa !635, !alias.scope !1016, !noalias !1022 - %.0.tr.i.us = trunc i32 %spec.select.i.us to i16 + %.0.tr.i.us = trunc nuw nsw i32 %spec.select.i.us to i16 %297 = shl nuw i16 %.0.tr.i.us, 12 %298 = or i16 %297, %296 store i16 %298, ptr %295, align 2, !tbaa !635, !alias.scope !1016, !noalias !1022 @@ -17130,17 +17130,17 @@ define i64 @quantize_iq1_m(ptr noalias noundef readonly captures(none) %0, ptr n %382 = fadd float %381, 0x4168000000000000 %383 = bitcast float %382 to i32 %384 = and i32 %383, 8388607 - %385 = call i32 @llvm.umin.i32(i32 %384, i32 4194311) - %386 = call i32 @llvm.umax.i32(i32 %385, i32 4194304) + %385 = call i32 @llvm.usub.sat.i32(i32 %384, i32 4194304) + %386 = call i32 @llvm.umin.i32(i32 %385, i32 7) %387 = trunc nuw nsw i64 %indvars.iv823.i.us to i32 %388 = and i32 %387, 3 %389 = mul nuw nsw i32 %388, 3 - %390 = shl nuw i32 %386, %389 + %390 = shl nuw nsw i32 %386, %389 %391 = lshr i64 %indvars.iv823.i.us, 2 %392 = and i64 %391, 1073741823 %393 = getelementptr inbounds nuw i16, ptr %44, i64 %392 %394 = load i16, ptr %393, align 2, !tbaa !635, !alias.scope !1047, !noalias !1052 - %395 = trunc i32 %390 to i16 + %395 = trunc nuw nsw i32 %390 to i16 %396 = or i16 %394, %395 store i16 %396, ptr %393, align 2, !tbaa !635, !alias.scope !1047, !noalias !1052 %397 = getelementptr inbounds nuw i8, ptr %13, i64 %indvars.iv823.i.us @@ -17190,7 +17190,7 @@ define i64 @quantize_iq1_m(ptr noalias noundef readonly captures(none) %0, ptr n %422 = shl nuw nsw i64 %indvars.iv823.i.us, 1 %423 = zext i8 %404 to i32 %424 = shl nuw nsw i32 %386, 1 - %425 = add nsw i32 %424, -8388607 + %425 = or disjoint i32 %424, 1 %426 = uitofp nneg i32 %425 to float %427 = and i8 %398, 1 %428 = icmp eq i8 %427, 0 @@ -17798,8 +17798,8 @@ best_index_int8.exit265: ; preds = %107, %112, %123 %184 = fadd float %183, 0x4168000000000000 %185 = bitcast float %184 to i32 %186 = and i32 %185, 8388607 - %187 = tail call i32 @llvm.umin.i32(i32 %186, i32 4194335) - %188 = tail call i32 @llvm.umax.i32(i32 %187, i32 4194272) + %187 = tail call i32 @llvm.umax.i32(i32 %186, i32 4194272) + %188 = tail call i32 @llvm.umin.i32(i32 %187, i32 4194335) %189 = add nsw i32 %188, -4194304 %190 = sitofp i32 %189 to float %191 = fmul float %156, %190 @@ -18456,9 +18456,9 @@ define i64 @quantize_iq2_s(ptr noalias noundef readonly captures(none) %0, ptr n %96 = fadd float %95, 0x4168000000000000 %97 = bitcast float %96 to i32 %98 = and i32 %97, 8388607 - %99 = tail call i32 @llvm.umin.i32(i32 %98, i32 4194306) - %100 = tail call i32 @llvm.umax.i32(i32 %99, i32 4194304) - %101 = trunc i32 %100 to i8 + %99 = tail call i32 @llvm.usub.sat.i32(i32 %98, i32 4194304) + %100 = tail call i32 @llvm.umin.i32(i32 %99, i32 2) + %101 = trunc nuw nsw i32 %100 to i8 %102 = getelementptr inbounds nuw [16 x i8], ptr %10, i64 0, i64 %91 store i8 %101, ptr %102, align 1, !tbaa !13, !noalias !1105 %indvars.iv.next447.i.us = add nuw nsw i64 %indvars.iv446.i.us, 1 @@ -18668,14 +18668,14 @@ iq2_find_best_neighbour.exit.i.us: ; preds = %145, %111 %193 = fadd float %192, 0x4168000000000000 %194 = bitcast float %193 to i32 %195 = and i32 %194, 8388607 - %196 = tail call i32 @llvm.umin.i32(i32 %195, i32 4194306) - %197 = tail call i32 @llvm.umax.i32(i32 %196, i32 4194304) + %196 = tail call i32 @llvm.usub.sat.i32(i32 %195, i32 4194304) + %197 = tail call i32 @llvm.umin.i32(i32 %196, i32 2) %indvars.iv470.tr.i.us = trunc i64 %indvars.iv470.i.us to i32 %198 = shl i32 %indvars.iv470.tr.i.us, 1 - %199 = shl i32 %197, %198 - %200 = trunc i32 %199 to i16 + %199 = shl nuw nsw i32 %197, %198 + %200 = trunc nuw i32 %199 to i16 %201 = or i16 %.0288394.i.us, %200 - %202 = trunc i32 %197 to i8 + %202 = trunc nuw nsw i32 %197 to i8 %203 = getelementptr inbounds nuw [16 x i8], ptr %9, i64 0, i64 %188 store i8 %202, ptr %203, align 1, !tbaa !13, !noalias !1105 %indvars.iv.next471.i.us = add nuw nsw i64 %indvars.iv470.i.us, 1 @@ -18935,8 +18935,8 @@ iq2_find_best_neighbour.exit345.i.us: ; preds = %238, %204, %185 %343 = fadd float %342, 0x4168000000000000 %344 = bitcast float %343 to i32 %345 = and i32 %344, 8388607 - %346 = tail call i32 @llvm.umin.i32(i32 %345, i32 4194319) - %347 = tail call i32 @llvm.umax.i32(i32 %346, i32 4194304) + %346 = tail call i32 @llvm.usub.sat.i32(i32 %345, i32 4194304) + %347 = tail call i32 @llvm.umin.i32(i32 %346, i32 15) %348 = and i64 %indvars.iv493.i.us, 1 %349 = icmp eq i64 %348, 0 br i1 %349, label %357, label %350 @@ -18946,14 +18946,14 @@ iq2_find_best_neighbour.exit345.i.us: ; preds = %238, %204, %185 %352 = and i64 %351, 2147483647 %353 = getelementptr inbounds nuw [8 x i8], ptr %337, i64 0, i64 %352 %354 = load i8, ptr %353, align 1, !tbaa !13, !alias.scope !1111, !noalias !1114 - %.tr.i.us = trunc i32 %347 to i8 + %.tr.i.us = trunc nuw nsw i32 %347 to i8 %355 = shl nuw i8 %.tr.i.us, 4 %356 = or i8 %354, %355 store i8 %356, ptr %353, align 1, !tbaa !13, !alias.scope !1111, !noalias !1114 br label %362 357: ; preds = %338 - %358 = trunc i32 %347 to i8 + %358 = trunc nuw nsw i32 %347 to i8 %359 = lshr exact i64 %indvars.iv493.i.us, 1 %360 = and i64 %359, 2147483647 %361 = getelementptr inbounds nuw [8 x i8], ptr %337, i64 0, i64 %360 @@ -20285,6 +20285,9 @@ declare noundef i32 @puts(ptr noundef readonly captures(none)) local_unnamed_add ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) declare void @llvm.experimental.noalias.scope.decl(metadata) #22 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.usub.sat.i32(i32, i32) #20 + attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/bench/lodepng/optimized/pngdetail.ll b/bench/lodepng/optimized/pngdetail.ll index 19556f050e5..c501026c925 100644 --- a/bench/lodepng/optimized/pngdetail.ll +++ b/bench/lodepng/optimized/pngdetail.ll @@ -2649,7 +2649,7 @@ _Z11HueToLetteri.exit47: ; preds = %_Z8RGBtoHSLhhhPhS_S br i1 %or.cond5, label %186, label %202 186: ; preds = %_Z11HueToLetteri.exit47.thread, %_Z11HueToLetteri.exit47 - %spec.select110117123 = phi i8 [ %spec.select110114, %_Z11HueToLetteri.exit47.thread ], [ %spec.select110, %_Z11HueToLetteri.exit47 ] + %spec.select110117123 = phi i8 [ %spec.select110114, %_Z11HueToLetteri.exit44.thread ], [ %spec.select110, %_Z11HueToLetteri.exit44 ] %187 = icmp ult i8 %spec.select110117123, 16 br i1 %187, label %_Z17lightnessToLetteri.exit, label %188 diff --git a/bench/luau/optimized/isocline.ll b/bench/luau/optimized/isocline.ll index 063cfc3a832..46c6e91410c 100644 --- a/bench/luau/optimized/isocline.ll +++ b/bench/luau/optimized/isocline.ll @@ -5390,8 +5390,8 @@ ic_get_env.exit.thread8: ; preds = %1, %ic_get_env.exit %8 = phi ptr [ %.pre.i, %ic_get_env.exit ], [ %2, %1 ] %9 = getelementptr inbounds nuw i8, ptr %8, i64 120 %10 = load i64, ptr %9, align 8, !tbaa !163 - %11 = tail call i64 @llvm.smin.i64(i64 %0, i64 5000) - %12 = tail call i64 @llvm.smax.i64(i64 %11, i64 0) + %11 = tail call i64 @llvm.smax.i64(i64 %0, i64 0) + %12 = tail call i64 @llvm.umin.i64(i64 %11, i64 5000) store i64 %12, ptr %9, align 8, !tbaa !163 br label %ic_get_env.exit.thread @@ -5426,12 +5426,12 @@ ic_get_env.exit.thread5: ; preds = %2, %ic_get_env.exit br i1 %12, label %ic_get_env.exit.thread, label %13 13: ; preds = %ic_get_env.exit.thread5 - %14 = tail call i64 @llvm.smin.i64(i64 %0, i64 1000) - %15 = tail call i64 @llvm.smax.i64(i64 %14, i64 0) + %14 = tail call i64 @llvm.smax.i64(i64 %0, i64 0) + %15 = tail call i64 @llvm.umin.i64(i64 %14, i64 1000) %16 = getelementptr inbounds nuw i8, ptr %11, i64 192 store i64 %15, ptr %16, align 8, !tbaa !164 - %17 = tail call i64 @llvm.smin.i64(i64 %1, i64 1000) - %18 = tail call i64 @llvm.smax.i64(i64 %17, i64 0) + %17 = tail call i64 @llvm.smax.i64(i64 %1, i64 0) + %18 = tail call i64 @llvm.umin.i64(i64 %17, i64 1000) %19 = getelementptr inbounds nuw i8, ptr %11, i64 200 store i64 %18, ptr %19, align 8, !tbaa !165 br label %ic_get_env.exit.thread @@ -9430,15 +9430,15 @@ color_from_ansi256.exit: ; preds = %100, %105, %108 %133 = load i64, ptr %6, align 8, !tbaa !57 %134 = load i64, ptr %7, align 8, !tbaa !57 %135 = load i64, ptr %8, align 8, !tbaa !57 - %136 = call i64 @llvm.smin.i64(i64 %133, i64 255) - %137 = call i64 @llvm.smax.i64(i64 %136, i64 0) + %136 = call i64 @llvm.smax.i64(i64 %133, i64 0) + %137 = call i64 @llvm.umin.i64(i64 %136, i64 255) %138 = shl nuw nsw i64 %137, 16 - %139 = call i64 @llvm.smin.i64(i64 %134, i64 255) - %140 = call i64 @llvm.smax.i64(i64 %139, i64 0) + %139 = call i64 @llvm.smax.i64(i64 %134, i64 0) + %140 = call i64 @llvm.umin.i64(i64 %139, i64 255) %141 = shl nuw nsw i64 %140, 8 %142 = or disjoint i64 %141, %138 - %143 = call i64 @llvm.smin.i64(i64 %135, i64 255) - %144 = call i64 @llvm.smax.i64(i64 %143, i64 0) + %143 = call i64 @llvm.smax.i64(i64 %135, i64 0) + %144 = call i64 @llvm.umin.i64(i64 %143, i64 255) %145 = or disjoint i64 %142, %144 %146 = or disjoint i64 %145, 16777216 %147 = load i64, ptr %4, align 8, !tbaa !57 diff --git a/bench/lvgl/optimized/lv_anim.ll b/bench/lvgl/optimized/lv_anim.ll index 92b199605d3..02e1d2cf8b0 100644 --- a/bench/lvgl/optimized/lv_anim.ll +++ b/bench/lvgl/optimized/lv_anim.ll @@ -1289,8 +1289,8 @@ define i32 @lv_anim_path_bounce(ptr noundef readonly captures(none) %0) local_un 38: ; preds = %18, %29, %35, %33, %24, %13 %.046 = phi i32 [ %11, %13 ], [ %21, %18 ], [ %26, %24 ], [ %32, %29 ], [ %37, %35 ], [ %11, %33 ] %.0 = phi i32 [ %narrow, %13 ], [ %20, %18 ], [ %25, %24 ], [ %31, %29 ], [ %36, %35 ], [ %6, %33 ] - %spec.store.select = tail call i32 @llvm.smin.i32(i32 %.0, i32 1024) - %spec.store.select9 = tail call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0) + %spec.store.select = tail call i32 @llvm.smax.i32(i32 %.0, i32 0) + %spec.store.select9 = tail call i32 @llvm.umin.i32(i32 %spec.store.select, i32 1024) %39 = tail call i32 @lv_bezier3(i32 noundef %spec.store.select9, i32 noundef 0, i32 noundef 500, i32 noundef 800, i32 noundef 1024) #10 %40 = mul nsw i32 %39, %.046 %41 = ashr i32 %40, 10 @@ -1707,9 +1707,6 @@ declare i32 @lv_cubic_bezier(i32 noundef, i32 noundef, i32 noundef, i32 noundef, ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.umax.i32(i32, i32) #9 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #9 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #9 diff --git a/bench/lvgl/optimized/lv_roller.ll b/bench/lvgl/optimized/lv_roller.ll index 3177a035610..1ff3764a620 100644 --- a/bench/lvgl/optimized/lv_roller.ll +++ b/bench/lvgl/optimized/lv_roller.ll @@ -831,8 +831,8 @@ define void @lv_roller_set_options(ptr noundef %0, ptr noundef %1, i32 noundef % %34 = add nsw i32 %31, %.sroa.0.0.extract.trunc.i %35 = mul i32 %34, %30 %36 = sdiv i32 1000, %35 - %spec.select67 = tail call i32 @llvm.smin.i32(i32 %36, i32 15) - %37 = tail call i32 @llvm.smax.i32(i32 %spec.select67, i32 3) + %spec.select67 = tail call i32 @llvm.smax.i32(i32 %36, i32 3) + %37 = tail call i32 @llvm.umin.i32(i32 %spec.select67, i32 15) %38 = getelementptr inbounds nuw i8, ptr %0, i64 76 %spec.select68 = or i32 %37, 1 store i32 %spec.select68, ptr %38, align 4, !tbaa !42 @@ -1827,6 +1827,9 @@ declare i32 @llvm.abs.i32(i32, i1 immarg) #5 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smin.i32(i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #5 + attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/meshlab/optimized/gltf_loader.ll b/bench/meshlab/optimized/gltf_loader.ll index bf12aad5443..6242ded1316 100644 --- a/bench/meshlab/optimized/gltf_loader.ll +++ b/bench/meshlab/optimized/gltf_loader.ll @@ -40006,8 +40006,8 @@ define internal fastcc noundef range(i32 0, 2) i32 @_ZL19stbi_write_jpg_coreP19s 30: ; preds = %6 %.not = icmp eq i32 %5, 0 - %31 = tail call i32 @llvm.smin.i32(i32 %5, i32 100) - %32 = tail call i32 @llvm.smax.i32(i32 %31, i32 1) + %31 = tail call i32 @llvm.smax.i32(i32 %5, i32 1) + %32 = tail call i32 @llvm.umin.i32(i32 %31, i32 100) %33 = select i1 %.not, i32 90, i32 %32 %34 = icmp samesign ult i32 %33, 50 br i1 %34, label %35, label %37 @@ -40034,8 +40034,8 @@ define internal fastcc noundef range(i32 0, 2) i32 @_ZL19stbi_write_jpg_coreP19s %45 = mul nsw i32 %44, %41 %46 = add nsw i32 %45, 50 %47 = sdiv i32 %46, 100 - %48 = tail call i32 @llvm.smin.i32(i32 %47, i32 255) - %49 = tail call i32 @llvm.smax.i32(i32 %48, i32 1) + %48 = tail call i32 @llvm.smax.i32(i32 %47, i32 1) + %49 = tail call i32 @llvm.umin.i32(i32 %48, i32 255) %50 = trunc nuw i32 %49 to i8 %51 = getelementptr inbounds nuw [64 x i8], ptr @_ZL17stbiw__jpg_ZigZag, i64 0, i64 %indvars.iv %52 = load i8, ptr %51, align 1 @@ -40047,8 +40047,8 @@ define internal fastcc noundef range(i32 0, 2) i32 @_ZL19stbi_write_jpg_coreP19s %57 = mul nsw i32 %56, %41 %58 = add nsw i32 %57, 50 %59 = sdiv i32 %58, 100 - %60 = tail call i32 @llvm.smin.i32(i32 %59, i32 255) - %61 = tail call i32 @llvm.smax.i32(i32 %60, i32 1) + %60 = tail call i32 @llvm.smax.i32(i32 %59, i32 1) + %61 = tail call i32 @llvm.umin.i32(i32 %60, i32 255) %62 = trunc nuw i32 %61 to i8 %63 = getelementptr inbounds nuw [64 x i8], ptr %18, i64 0, i64 %53 store i8 %62, ptr %63, align 1 diff --git a/bench/meshlab/optimized/mainwindow_RunTime.ll b/bench/meshlab/optimized/mainwindow_RunTime.ll index cd30ab89215..2af8edfa968 100644 --- a/bench/meshlab/optimized/mainwindow_RunTime.ll +++ b/bench/meshlab/optimized/mainwindow_RunTime.ll @@ -1264,9 +1264,9 @@ _ZN7QStringD2Ev.exit: ; preds = %40, %_ZN9QtPrivate8 br i1 %51, label %.lr.ph, label %.lr.ph78 .lr.ph: ; preds = %_ZN7QStringD2Ev.exit - %52 = call i32 @llvm.umin.i32(i32 %50, i32 9) - %53 = getelementptr inbounds nuw i8, ptr %0, i64 704 - %wide.trip.count = zext nneg i32 %52 to i64 + %52 = getelementptr inbounds nuw i8, ptr %0, i64 704 + %53 = call i32 @llvm.umin.i32(i32 %50, i32 9) + %wide.trip.count = zext nneg i32 %53 to i64 br label %57 .preheader: ; preds = %_ZN7QStringD2Ev.exit69 @@ -1391,7 +1391,7 @@ _ZN9QtPrivate8RefCount5derefEv.exit.thread2.i55: ; preds = %_ZN9QtPrivate8RefCo br label %_ZN7QStringD2Ev.exit60 _ZN7QStringD2Ev.exit60: ; preds = %_ZN7QStringD2Ev.exit54, %_ZN9QtPrivate8RefCount5derefEv.exit.i56, %_ZN9QtPrivate8RefCount5derefEv.exit.thread2.i55 - %87 = getelementptr inbounds nuw [9 x ptr], ptr %53, i64 0, i64 %indvars.iv + %87 = getelementptr inbounds nuw [9 x ptr], ptr %52, i64 0, i64 %indvars.iv %88 = load ptr, ptr %87, align 8 invoke void @_ZN7QAction7setTextERK7QString(ptr noundef nonnull align 8 dereferenceable(16) %88, ptr noundef nonnull align 8 dereferenceable(8) %8) to label %89 unwind label %133 @@ -2035,9 +2035,9 @@ _ZN7QStringD2Ev.exit: ; preds = %17, %_ZN9QtPrivate8 br i1 %28, label %.lr.ph, label %.lr.ph70 .lr.ph: ; preds = %_ZN7QStringD2Ev.exit - %29 = call i32 @llvm.umin.i32(i32 %27, i32 9) - %30 = getelementptr inbounds nuw i8, ptr %0, i64 776 - %wide.trip.count = zext nneg i32 %29 to i64 + %29 = getelementptr inbounds nuw i8, ptr %0, i64 776 + %30 = call i32 @llvm.umin.i32(i32 %27, i32 9) + %wide.trip.count = zext nneg i32 %30 to i64 br label %34 .preheader: ; preds = %_ZN7QStringD2Ev.exit61 @@ -2162,7 +2162,7 @@ _ZN9QtPrivate8RefCount5derefEv.exit.thread2.i47: ; preds = %_ZN9QtPrivate8RefCo br label %_ZN7QStringD2Ev.exit52 _ZN7QStringD2Ev.exit52: ; preds = %_ZN7QStringD2Ev.exit46, %_ZN9QtPrivate8RefCount5derefEv.exit.i48, %_ZN9QtPrivate8RefCount5derefEv.exit.thread2.i47 - %64 = getelementptr inbounds nuw [9 x ptr], ptr %30, i64 0, i64 %indvars.iv + %64 = getelementptr inbounds nuw [9 x ptr], ptr %29, i64 0, i64 %indvars.iv %65 = load ptr, ptr %64, align 8 invoke void @_ZN7QAction7setTextERK7QString(ptr noundef nonnull align 8 dereferenceable(16) %65, ptr noundef nonnull align 8 dereferenceable(8) %7) to label %66 unwind label %108 diff --git a/bench/minetest/optimized/l_env.ll b/bench/minetest/optimized/l_env.ll index 28f6a2c0489..f7f640ea451 100644 --- a/bench/minetest/optimized/l_env.ll +++ b/bench/minetest/optimized/l_env.ll @@ -5865,9 +5865,9 @@ for.cond9.preheader.i.i.i: ; preds = %for.cond9.for.inc23 %cond98.i.i.i = call i32 @llvm.smax.i32(i32 %cond96.i.i.i, i32 0) %conv99.i.i.i = trunc nuw nsw i32 %cond98.i.i.i to i16 %sub156.i.i.i = sub nsw i32 %conv.i13.i.i320.i.i.i, %conv77.i.i.i - %cond174.i.i.i = call i32 @llvm.smin.i32(i32 %sub156.i.i.i, i32 15) - %cond176.i.i.i = call i32 @llvm.smax.i32(i32 %cond174.i.i.i, i32 0) - %cmp181.not372.i.i.i = icmp sgt i32 %cond96.i.i.i, %cond176.i.i.i + %cond174.i.i.i = call i32 @llvm.smax.i32(i32 %sub156.i.i.i, i32 0) + %cond176.i.i.i = call i32 @llvm.umin.i32(i32 %cond174.i.i.i, i32 15) + %cmp181.not372.i.i.i = icmp slt i32 %47, %cond96.i.i.i %cmp181.not372.fr.i.i.i = freeze i1 %cmp181.not372.i.i.i br i1 %cmp181.not372.fr.i.i.i, label %for.cond16.preheader.us385.i.i.i, label %for.cond16.preheader.i.i.i @@ -5907,9 +5907,9 @@ for.cond16.preheader.i.i.i: ; preds = %for.cond9.preheader %cond46.i.i.i = call i32 @llvm.smax.i32(i32 %cond.i.i.i, i32 0) %conv47.i.i.i = trunc nuw nsw i32 %cond46.i.i.i to i16 %sub104.i.i.i = sub nsw i32 %conv.i.i.i309.i.i.i, %conv30.i.i.i - %cond122.i.i.i = call i32 @llvm.smin.i32(i32 %sub104.i.i.i, i32 15) - %cond124.i.i.i = call i32 @llvm.smax.i32(i32 %cond122.i.i.i, i32 0) - %cmp193.not366.i.i.i = icmp sgt i32 %cond.i.i.i, %cond124.i.i.i + %cond122.i.i.i = call i32 @llvm.smax.i32(i32 %sub104.i.i.i, i32 0) + %cond124.i.i.i = call i32 @llvm.umin.i32(i32 %cond122.i.i.i, i32 15) + %cmp193.not366.i.i.i = icmp slt i32 %cond122.i.i.i, %cond.i.i.i %cmp193.not366.i.fr.i.i = freeze i1 %cmp193.not366.i.i.i %invariant.op133 = or disjoint i48 %bp.sroa.7.0.insert.shift.i.i.i, %bp.sroa.0.0.insert.ext.i.i.i br i1 %cmp193.not366.i.fr.i.i, label %for.body22.i.us.i.i, label %for.body22.i.i.i @@ -5944,9 +5944,9 @@ call24.i.i.noexc.i: ; preds = %for.body22.i.i.i %cond72.i.i.i = call i32 @llvm.smax.i32(i32 %cond70.i.i.i, i32 0) %conv73.i.i.i = trunc nuw nsw i32 %cond72.i.i.i to i16 %sub130.i.i.i = sub nsw i32 %conv127.i.i.i, %conv51.i.i.i - %cond148.i.i.i = call i32 @llvm.smin.i32(i32 %sub130.i.i.i, i32 15) - %cond150.i.i.i = call i32 @llvm.smax.i32(i32 %cond148.i.i.i, i32 0) - %cmp187.not369.i.i.i = icmp sgt i32 %cond70.i.i.i, %cond150.i.i.i + %cond148.i.i.i = call i32 @llvm.smax.i32(i32 %sub130.i.i.i, i32 0) + %cond150.i.i.i = call i32 @llvm.umin.i32(i32 %cond148.i.i.i, i32 15) + %cmp187.not369.i.i.i = icmp slt i32 %51, %cond70.i.i.i %tobool.not.i.i99.i = icmp eq ptr %call24.i.i102.i, null %data.i.i.i.i = getelementptr inbounds nuw i8, ptr %call24.i.i102.i, i64 24 br i1 %cmp187.not369.i.i.i, label %for.cond178.cleanup215_crit_edge.i.i.i, label %for.cond184.preheader.i.i.i @@ -6384,9 +6384,9 @@ for.cond9.preheader.i.i205.i: ; preds = %for.cond9.for.inc23 %cond98.i.i212.i = call i32 @llvm.smax.i32(i32 %cond96.i.i211.i, i32 0) %conv99.i.i213.i = trunc nuw nsw i32 %cond98.i.i212.i to i16 %sub156.i.i214.i = sub nsw i32 %conv.i13.i.i320.i.i187.i, %conv77.i.i209.i - %cond174.i.i215.i = call i32 @llvm.smin.i32(i32 %sub156.i.i214.i, i32 15) - %cond176.i.i216.i = call i32 @llvm.smax.i32(i32 %cond174.i.i215.i, i32 0) - %cmp181.not370.i.i.i = icmp sgt i32 %cond96.i.i211.i, %cond176.i.i216.i + %cond174.i.i215.i = call i32 @llvm.smax.i32(i32 %sub156.i.i214.i, i32 0) + %cond176.i.i216.i = call i32 @llvm.umin.i32(i32 %cond174.i.i215.i, i32 15) + %cmp181.not370.i.i.i = icmp slt i32 %87, %cond96.i.i211.i %cmp181.not370.fr.i.i.i = freeze i1 %cmp181.not370.i.i.i br i1 %cmp181.not370.fr.i.i.i, label %for.cond16.preheader.us383.i.i.i, label %for.cond16.preheader.i.i217.i @@ -6427,9 +6427,9 @@ for.cond16.preheader.i.i217.i: ; preds = %for.cond9.preheader %cond46.i.i223.i = call i32 @llvm.smax.i32(i32 %cond.i.i222.i, i32 0) %conv47.i.i224.i = trunc nuw nsw i32 %cond46.i.i223.i to i16 %sub104.i.i225.i = sub nsw i32 %conv.i.i.i309.i.i176.i, %conv30.i.i220.i - %cond122.i.i226.i = call i32 @llvm.smin.i32(i32 %sub104.i.i225.i, i32 15) - %cond124.i.i227.i = call i32 @llvm.smax.i32(i32 %cond122.i.i226.i, i32 0) - %cmp193.not364.i.i.i = icmp sgt i32 %cond.i.i222.i, %cond124.i.i227.i + %cond122.i.i226.i = call i32 @llvm.smax.i32(i32 %sub104.i.i225.i, i32 0) + %cond124.i.i227.i = call i32 @llvm.umin.i32(i32 %cond122.i.i226.i, i32 15) + %cmp193.not364.i.i.i = icmp slt i32 %89, %cond.i.i222.i %cmp193.not364.i.fr.i.i = freeze i1 %cmp193.not364.i.i.i %invariant.op122 = or disjoint i48 %bp.sroa.7.0.insert.shift.i.i207.i, %bp.sroa.0.0.insert.ext.i.i218.i br i1 %cmp193.not364.i.fr.i.i, label %for.body22.i.us.i352.i, label %for.body22.i.i228.i @@ -6465,9 +6465,9 @@ call24.i.i.noexc372.i: ; preds = %for.body22.i.i228.i %cond72.i.i236.i = call i32 @llvm.smax.i32(i32 %cond70.i.i235.i, i32 0) %conv73.i.i237.i = trunc nuw nsw i32 %cond72.i.i236.i to i16 %sub130.i.i238.i = sub nsw i32 %conv127.i.i200.i, %conv51.i.i233.i - %cond148.i.i239.i = call i32 @llvm.smin.i32(i32 %sub130.i.i238.i, i32 15) - %cond150.i.i240.i = call i32 @llvm.smax.i32(i32 %cond148.i.i239.i, i32 0) - %cmp187.not367.i.i.i = icmp sgt i32 %cond70.i.i235.i, %cond150.i.i240.i + %cond148.i.i239.i = call i32 @llvm.smax.i32(i32 %sub130.i.i238.i, i32 0) + %cond150.i.i240.i = call i32 @llvm.umin.i32(i32 %cond148.i.i239.i, i32 15) + %cmp187.not367.i.i.i = icmp slt i32 %91, %cond70.i.i235.i %tobool.not.i.i241.i = icmp eq ptr %call24.i.i373.i, null %data.i.i.i242.i = getelementptr inbounds nuw i8, ptr %call24.i.i373.i, i64 24 br i1 %cmp187.not367.i.i.i, label %for.cond178.cleanup215_crit_edge.i.i303.i, label %for.cond184.preheader.i.i243.i diff --git a/bench/miniaudio/optimized/unity.ll b/bench/miniaudio/optimized/unity.ll index cc4c7036419..6d2a99b5cfd 100644 --- a/bench/miniaudio/optimized/unity.ll +++ b/bench/miniaudio/optimized/unity.ll @@ -13764,8 +13764,8 @@ define void @ma_clip_samples_u8(ptr noundef writeonly captures(none) %0, ptr nou %.06 = phi i64 [ %11, %.lr.ph ], [ 0, %3 ] %4 = getelementptr inbounds nuw i16, ptr %1, i64 %.06 %5 = load i16, ptr %4, align 2, !tbaa !334 - %6 = tail call i16 @llvm.smin.i16(i16 %5, i16 127) - %7 = tail call i16 @llvm.smax.i16(i16 %6, i16 -128) + %6 = tail call i16 @llvm.smax.i16(i16 %5, i16 -128) + %7 = tail call i16 @llvm.smin.i16(i16 %6, i16 127) %8 = trunc nsw i16 %7 to i8 %9 = xor i8 %8, -128 %10 = getelementptr inbounds nuw i8, ptr %0, i64 %.06 @@ -13787,8 +13787,8 @@ define void @ma_clip_samples_s16(ptr noundef writeonly captures(none) %0, ptr no %.06 = phi i64 [ %10, %.lr.ph ], [ 0, %3 ] %4 = getelementptr inbounds nuw i32, ptr %1, i64 %.06 %5 = load i32, ptr %4, align 4, !tbaa !3 - %6 = tail call i32 @llvm.smin.i32(i32 %5, i32 32767) - %7 = tail call i32 @llvm.smax.i32(i32 %6, i32 -32768) + %6 = tail call i32 @llvm.smax.i32(i32 %5, i32 -32768) + %7 = tail call i32 @llvm.smin.i32(i32 %6, i32 32767) %8 = trunc nsw i32 %7 to i16 %9 = getelementptr inbounds nuw i16, ptr %0, i64 %.06 store i16 %8, ptr %9, align 2, !tbaa !334 @@ -13809,8 +13809,8 @@ define void @ma_clip_samples_s24(ptr noundef writeonly captures(none) %0, ptr no %.013 = phi i64 [ %17, %.lr.ph ], [ 0, %3 ] %4 = getelementptr inbounds nuw i64, ptr %1, i64 %.013 %5 = load i64, ptr %4, align 8, !tbaa !63 - %6 = tail call i64 @llvm.smin.i64(i64 %5, i64 8388607) - %7 = tail call i64 @llvm.smax.i64(i64 %6, i64 -8388608) + %6 = tail call i64 @llvm.smax.i64(i64 %5, i64 -8388608) + %7 = tail call i64 @llvm.smin.i64(i64 %6, i64 8388607) %8 = trunc i64 %7 to i8 %9 = mul i64 %.013, 3 %10 = getelementptr inbounds nuw i8, ptr %0, i64 %9 @@ -13840,8 +13840,8 @@ define void @ma_clip_samples_s32(ptr noundef writeonly captures(none) %0, ptr no %.06 = phi i64 [ %10, %.lr.ph ], [ 0, %3 ] %4 = getelementptr inbounds nuw i64, ptr %1, i64 %.06 %5 = load i64, ptr %4, align 8, !tbaa !63 - %6 = tail call i64 @llvm.smin.i64(i64 %5, i64 2147483647) - %7 = tail call i64 @llvm.smax.i64(i64 %6, i64 -2147483648) + %6 = tail call i64 @llvm.smax.i64(i64 %5, i64 -2147483648) + %7 = tail call i64 @llvm.smin.i64(i64 %6, i64 2147483647) %8 = trunc nsw i64 %7 to i32 %9 = getelementptr inbounds nuw i32, ptr %0, i64 %.06 store i32 %8, ptr %9, align 4, !tbaa !3 @@ -13896,8 +13896,8 @@ define void @ma_clip_pcm_frames(ptr noundef writeonly captures(none) %0, ptr nou %.06.i = phi i64 [ %16, %.lr.ph.i ], [ 0, %8 ] %9 = getelementptr inbounds nuw i16, ptr %1, i64 %.06.i %10 = load i16, ptr %9, align 2, !tbaa !334 - %11 = tail call i16 @llvm.smin.i16(i16 %10, i16 127) - %12 = tail call i16 @llvm.smax.i16(i16 %11, i16 -128) + %11 = tail call i16 @llvm.smax.i16(i16 %10, i16 -128) + %12 = tail call i16 @llvm.smin.i16(i16 %11, i16 127) %13 = trunc nsw i16 %12 to i8 %14 = xor i8 %13, -128 %15 = getelementptr inbounds nuw i8, ptr %0, i64 %.06.i @@ -13914,8 +13914,8 @@ define void @ma_clip_pcm_frames(ptr noundef writeonly captures(none) %0, ptr nou %.06.i19 = phi i64 [ %24, %.lr.ph.i18 ], [ 0, %17 ] %18 = getelementptr inbounds nuw i32, ptr %1, i64 %.06.i19 %19 = load i32, ptr %18, align 4, !tbaa !3 - %20 = tail call i32 @llvm.smin.i32(i32 %19, i32 32767) - %21 = tail call i32 @llvm.smax.i32(i32 %20, i32 -32768) + %20 = tail call i32 @llvm.smax.i32(i32 %19, i32 -32768) + %21 = tail call i32 @llvm.smin.i32(i32 %20, i32 32767) %22 = trunc nsw i32 %21 to i16 %23 = getelementptr inbounds nuw i16, ptr %0, i64 %.06.i19 store i16 %22, ptr %23, align 2, !tbaa !334 @@ -13931,8 +13931,8 @@ define void @ma_clip_pcm_frames(ptr noundef writeonly captures(none) %0, ptr nou %.013.i = phi i64 [ %39, %.lr.ph.i22 ], [ 0, %25 ] %26 = getelementptr inbounds nuw i64, ptr %1, i64 %.013.i %27 = load i64, ptr %26, align 8, !tbaa !63 - %28 = tail call i64 @llvm.smin.i64(i64 %27, i64 8388607) - %29 = tail call i64 @llvm.smax.i64(i64 %28, i64 -8388608) + %28 = tail call i64 @llvm.smax.i64(i64 %27, i64 -8388608) + %29 = tail call i64 @llvm.smin.i64(i64 %28, i64 8388607) %30 = trunc i64 %29 to i8 %31 = mul i64 %.013.i, 3 %32 = getelementptr inbounds nuw i8, ptr %0, i64 %31 @@ -13957,8 +13957,8 @@ define void @ma_clip_pcm_frames(ptr noundef writeonly captures(none) %0, ptr nou %.06.i26 = phi i64 [ %47, %.lr.ph.i25 ], [ 0, %40 ] %41 = getelementptr inbounds nuw i64, ptr %1, i64 %.06.i26 %42 = load i64, ptr %41, align 8, !tbaa !63 - %43 = tail call i64 @llvm.smin.i64(i64 %42, i64 2147483647) - %44 = tail call i64 @llvm.smax.i64(i64 %43, i64 -2147483648) + %43 = tail call i64 @llvm.smax.i64(i64 %42, i64 -2147483648) + %44 = tail call i64 @llvm.smin.i64(i64 %43, i64 2147483647) %45 = trunc nsw i64 %44 to i32 %46 = getelementptr inbounds nuw i32, ptr %0, i64 %.06.i26 store i32 %45, ptr %46, align 4, !tbaa !3 @@ -14877,8 +14877,8 @@ define void @ma_copy_and_apply_volume_and_clip_samples_u8(ptr noundef writeonly %12 = sext i16 %11 to i32 %13 = mul i32 %8, %12 %14 = ashr i32 %13, 16 - %15 = tail call i32 @llvm.smin.i32(i32 %14, i32 127) - %16 = tail call i32 @llvm.smax.i32(i32 %15, i32 -128) + %15 = tail call i32 @llvm.smax.i32(i32 %14, i32 -128) + %16 = tail call i32 @llvm.smin.i32(i32 %15, i32 127) %17 = trunc nsw i32 %16 to i8 %18 = xor i8 %17, -128 %19 = getelementptr inbounds nuw i8, ptr %0, i64 %.08 @@ -14908,8 +14908,8 @@ define void @ma_copy_and_apply_volume_and_clip_samples_s16(ptr noundef writeonly %10 = load i32, ptr %9, align 4, !tbaa !3 %11 = mul nsw i32 %10, %7 %12 = ashr i32 %11, 8 - %13 = tail call i32 @llvm.smin.i32(i32 %12, i32 32767) - %14 = tail call i32 @llvm.smax.i32(i32 %13, i32 -32768) + %13 = tail call i32 @llvm.smax.i32(i32 %12, i32 -32768) + %14 = tail call i32 @llvm.smin.i32(i32 %13, i32 32767) %15 = trunc nsw i32 %14 to i16 %16 = getelementptr inbounds nuw i16, ptr %0, i64 %.08 store i16 %15, ptr %16, align 2, !tbaa !334 @@ -14938,8 +14938,8 @@ define void @ma_copy_and_apply_volume_and_clip_samples_s24(ptr noundef writeonly %10 = load i64, ptr %9, align 8, !tbaa !63 %11 = mul nsw i64 %10, %7 %12 = ashr i64 %11, 8 - %13 = tail call i64 @llvm.smin.i64(i64 %12, i64 8388607) - %14 = tail call i64 @llvm.smax.i64(i64 %13, i64 -8388608) + %13 = tail call i64 @llvm.smax.i64(i64 %12, i64 -8388608) + %14 = tail call i64 @llvm.smin.i64(i64 %13, i64 8388607) %15 = trunc i64 %14 to i8 %16 = mul i64 %.015, 3 %17 = getelementptr inbounds nuw i8, ptr %0, i64 %16 @@ -14977,8 +14977,8 @@ define void @ma_copy_and_apply_volume_and_clip_samples_s32(ptr noundef writeonly %10 = load i64, ptr %9, align 8, !tbaa !63 %11 = mul nsw i64 %10, %7 %12 = ashr i64 %11, 8 - %13 = tail call i64 @llvm.smin.i64(i64 %12, i64 2147483647) - %14 = tail call i64 @llvm.smax.i64(i64 %13, i64 -2147483648) + %13 = tail call i64 @llvm.smax.i64(i64 %12, i64 -2147483648) + %14 = tail call i64 @llvm.smin.i64(i64 %13, i64 2147483647) %15 = trunc nsw i64 %14 to i32 %16 = getelementptr inbounds nuw i32, ptr %0, i64 %.08 store i32 %15, ptr %16, align 4, !tbaa !3 @@ -15097,8 +15097,8 @@ ma_zero_memory_default.exit.i.i: ; preds = %23, %.lr.ph.i %37 = sext i16 %36 to i32 %38 = mul i32 %33, %37 %39 = ashr i32 %38, 16 - %40 = tail call i32 @llvm.smin.i32(i32 %39, i32 127) - %41 = tail call i32 @llvm.smax.i32(i32 %40, i32 -128) + %40 = tail call i32 @llvm.smax.i32(i32 %39, i32 -128) + %41 = tail call i32 @llvm.smin.i32(i32 %40, i32 127) %42 = trunc nsw i32 %41 to i8 %43 = xor i8 %42, -128 %44 = getelementptr inbounds nuw i8, ptr %0, i64 %.08.i @@ -15123,8 +15123,8 @@ ma_zero_memory_default.exit.i.i: ; preds = %23, %.lr.ph.i %52 = load i32, ptr %51, align 4, !tbaa !3 %53 = mul nsw i32 %52, %49 %54 = ashr i32 %53, 8 - %55 = tail call i32 @llvm.smin.i32(i32 %54, i32 32767) - %56 = tail call i32 @llvm.smax.i32(i32 %55, i32 -32768) + %55 = tail call i32 @llvm.smax.i32(i32 %54, i32 -32768) + %56 = tail call i32 @llvm.smin.i32(i32 %55, i32 32767) %57 = trunc nsw i32 %56 to i16 %58 = getelementptr inbounds nuw i16, ptr %0, i64 %.08.i37 store i16 %57, ptr %58, align 2, !tbaa !334 @@ -15148,8 +15148,8 @@ ma_zero_memory_default.exit.i.i: ; preds = %23, %.lr.ph.i %66 = load i64, ptr %65, align 8, !tbaa !63 %67 = mul nsw i64 %66, %63 %68 = ashr i64 %67, 8 - %69 = tail call i64 @llvm.smin.i64(i64 %68, i64 8388607) - %70 = tail call i64 @llvm.smax.i64(i64 %69, i64 -8388608) + %69 = tail call i64 @llvm.smax.i64(i64 %68, i64 -8388608) + %70 = tail call i64 @llvm.smin.i64(i64 %69, i64 8388607) %71 = trunc i64 %70 to i8 %72 = mul i64 %.015.i, 3 %73 = getelementptr inbounds nuw i8, ptr %0, i64 %72 @@ -15182,8 +15182,8 @@ ma_zero_memory_default.exit.i.i: ; preds = %23, %.lr.ph.i %87 = load i64, ptr %86, align 8, !tbaa !63 %88 = mul nsw i64 %87, %84 %89 = ashr i64 %88, 8 - %90 = tail call i64 @llvm.smin.i64(i64 %89, i64 2147483647) - %91 = tail call i64 @llvm.smax.i64(i64 %90, i64 -2147483648) + %90 = tail call i64 @llvm.smax.i64(i64 %89, i64 -2147483648) + %91 = tail call i64 @llvm.smin.i64(i64 %90, i64 2147483647) %92 = trunc nsw i64 %91 to i32 %93 = getelementptr inbounds nuw i32, ptr %0, i64 %.08.i44 store i32 %92, ptr %93, align 4, !tbaa !3 @@ -18544,8 +18544,8 @@ ma_biquad_process_pcm_frame_f32__direct_form_2_transposed.exit: ; preds = %34 %85 = mul nsw i32 %64, %77 %86 = mul nsw i32 %80, %66 %87 = sub nsw i32 %85, %86 - %88 = tail call i32 @llvm.smin.i32(i32 %80, i32 32767) - %89 = tail call i32 @llvm.smax.i32(i32 %88, i32 -32768) + %88 = tail call i32 @llvm.smax.i32(i32 %80, i32 -32768) + %89 = tail call i32 @llvm.smin.i32(i32 %88, i32 32767) %90 = trunc nsw i32 %89 to i16 %91 = getelementptr inbounds nuw i16, ptr %.03246, i64 %indvars.iv store i16 %90, ptr %91, align 2, !tbaa !334 @@ -21086,8 +21086,8 @@ ma_lpf1_process_pcm_frame_s16.exit.i: ; preds = %188 %237 = mul nsw i32 %212, %229 %238 = mul nsw i32 %232, %216 %239 = sub nsw i32 %237, %238 - %240 = tail call i32 @llvm.smin.i32(i32 %232, i32 32767) - %241 = tail call i32 @llvm.smax.i32(i32 %240, i32 -32768) + %240 = tail call i32 @llvm.smax.i32(i32 %232, i32 -32768) + %241 = tail call i32 @llvm.smin.i32(i32 %240, i32 32767) %242 = trunc nsw i32 %241 to i16 store i16 %242, ptr %227, align 2, !tbaa !334 store i32 %236, ptr %222, align 4, !tbaa !7 @@ -23494,8 +23494,8 @@ ma_hpf1_process_pcm_frame_s16.exit: ; preds = %189 %237 = mul nsw i32 %212, %229 %238 = mul nsw i32 %232, %216 %239 = sub nsw i32 %237, %238 - %240 = tail call i32 @llvm.smin.i32(i32 %232, i32 32767) - %241 = tail call i32 @llvm.smax.i32(i32 %240, i32 -32768) + %240 = tail call i32 @llvm.smax.i32(i32 %232, i32 -32768) + %241 = tail call i32 @llvm.smin.i32(i32 %240, i32 32767) %242 = trunc nsw i32 %241 to i16 store i16 %242, ptr %227, align 2, !tbaa !334 store i32 %236, ptr %222, align 4, !tbaa !7 @@ -24868,8 +24868,8 @@ ma_biquad_process_pcm_frame_f32.exit: ; preds = %52 %123 = mul nsw i32 %98, %115 %124 = mul nsw i32 %118, %102 %125 = sub nsw i32 %123, %124 - %126 = tail call i32 @llvm.smin.i32(i32 %118, i32 32767) - %127 = tail call i32 @llvm.smax.i32(i32 %126, i32 -32768) + %126 = tail call i32 @llvm.smax.i32(i32 %118, i32 -32768) + %127 = tail call i32 @llvm.smin.i32(i32 %126, i32 32767) %128 = trunc nsw i32 %127 to i16 store i16 %128, ptr %113, align 2, !tbaa !334 store i32 %122, ptr %108, align 4, !tbaa !7 @@ -34803,8 +34803,8 @@ ma_lpf1_process_pcm_frame_s16.exit.i.i.i: ; preds = %66 %115 = mul nsw i32 %90, %107 %116 = mul nsw i32 %110, %94 %117 = sub nsw i32 %115, %116 - %118 = tail call i32 @llvm.smin.i32(i32 %110, i32 32767) - %119 = tail call i32 @llvm.smax.i32(i32 %118, i32 -32768) + %118 = tail call i32 @llvm.smax.i32(i32 %110, i32 -32768) + %119 = tail call i32 @llvm.smin.i32(i32 %118, i32 32767) %120 = trunc nsw i32 %119 to i16 store i16 %120, ptr %105, align 2, !tbaa !334 store i32 %114, ptr %100, align 4, !tbaa !7 @@ -35143,8 +35143,8 @@ ma_lpf1_process_pcm_frame_s16.exit.i.i22.i: ; preds = %239 %288 = mul nsw i32 %263, %280 %289 = mul nsw i32 %283, %267 %290 = sub nsw i32 %288, %289 - %291 = tail call i32 @llvm.smin.i32(i32 %283, i32 32767) - %292 = tail call i32 @llvm.smax.i32(i32 %291, i32 -32768) + %291 = tail call i32 @llvm.smax.i32(i32 %283, i32 -32768) + %292 = tail call i32 @llvm.smin.i32(i32 %291, i32 32767) %293 = trunc nsw i32 %292 to i16 store i16 %293, ptr %278, align 2, !tbaa !334 store i32 %287, ptr %273, align 4, !tbaa !7 @@ -38945,8 +38945,8 @@ ma_zero_memory_default.exit.i: ; preds = %19, %.lr.ph %.094.lcssa.i = phi i32 [ 0, %.preheader.i ], [ %80, %75 ] %81 = load i32, ptr %70, align 8, !tbaa !942 %82 = udiv i32 %.094.lcssa.i, %81 - %83 = tail call i32 @llvm.smin.i32(i32 %82, i32 127) - %84 = tail call i32 @llvm.smax.i32(i32 %83, i32 -128) + %83 = tail call i32 @llvm.smax.i32(i32 %82, i32 -128) + %84 = tail call i32 @llvm.smin.i32(i32 %83, i32 127) %85 = trunc nsw i32 %84 to i8 %86 = xor i8 %85, -128 %87 = getelementptr inbounds nuw i8, ptr %1, i64 %.087149.i @@ -39475,8 +39475,8 @@ ma_zero_memory_64.exit.i: ; preds = %ma_zero_memory_64.e %298 = mul nsw i32 %297, %292 %299 = ashr i32 %298, 12 %300 = add nsw i32 %299, %290 - %301 = tail call i32 @llvm.smin.i32(i32 %300, i32 32767) - %302 = tail call i32 @llvm.smax.i32(i32 %301, i32 -32768) + %301 = tail call i32 @llvm.smax.i32(i32 %300, i32 -32768) + %302 = tail call i32 @llvm.smin.i32(i32 %301, i32 32767) %303 = trunc nsw i32 %302 to i16 store i16 %303, ptr %288, align 2, !tbaa !334 %indvars.iv.next307.i = add nuw nsw i64 %indvars.iv306.i, 1 @@ -39737,8 +39737,8 @@ ma_zero_memory_64.exit.i: ; preds = %ma_zero_memory_64.e %453 = mul nsw i64 %452, %446 %454 = ashr i64 %453, 12 %455 = add nsw i64 %454, %439 - %456 = tail call i64 @llvm.smin.i64(i64 %455, i64 2147483647) - %457 = tail call i64 @llvm.smax.i64(i64 %456, i64 -2147483648) + %456 = tail call i64 @llvm.smax.i64(i64 %455, i64 -2147483648) + %457 = tail call i64 @llvm.smin.i64(i64 %456, i64 2147483647) %458 = trunc nsw i64 %457 to i32 store i32 %458, ptr %437, align 4, !tbaa !3 %indvars.iv.next295.i = add nuw nsw i64 %indvars.iv294.i, 1 @@ -94152,8 +94152,8 @@ define internal fastcc i64 @ma_dr_wav_read_pcm_frames_s16__msadpcm(ptr noundef n %146 = load i32, ptr %21, align 8, !tbaa !3 %147 = mul nsw i32 %146, %.0163 %148 = add nsw i32 %145, %147 - %149 = call i32 @llvm.smin.i32(i32 %148, i32 32767) - %150 = call i32 @llvm.smax.i32(i32 %149, i32 -32768) + %149 = call i32 @llvm.smax.i32(i32 %148, i32 -32768) + %150 = call i32 @llvm.smin.i32(i32 %149, i32 32767) %151 = zext nneg i8 %125 to i64 %152 = getelementptr inbounds nuw [16 x i32], ptr @ma_dr_wav_read_pcm_frames_s16__msadpcm.adaptationTable, i64 0, i64 %151 %153 = load i32, ptr %152, align 4, !tbaa !3 @@ -94169,8 +94169,8 @@ define internal fastcc i64 @ma_dr_wav_read_pcm_frames_s16__msadpcm(ptr noundef n %160 = ashr i32 %159, 8 %161 = mul nsw i32 %spec.select, %.0 %162 = add nsw i32 %160, %161 - %163 = call i32 @llvm.smin.i32(i32 %162, i32 32767) - %164 = call i32 @llvm.smax.i32(i32 %163, i32 -32768) + %163 = call i32 @llvm.smax.i32(i32 %162, i32 -32768) + %164 = call i32 @llvm.smin.i32(i32 %163, i32 32767) %165 = and i8 %124, 15 %166 = zext nneg i8 %165 to i64 %167 = getelementptr inbounds nuw [16 x i32], ptr @ma_dr_wav_read_pcm_frames_s16__msadpcm.adaptationTable, i64 0, i64 %166 @@ -94202,8 +94202,8 @@ define internal fastcc i64 @ma_dr_wav_read_pcm_frames_s16__msadpcm(ptr noundef n %184 = load i32, ptr %23, align 4, !tbaa !3 %185 = mul nsw i32 %184, %.0 %186 = add nsw i32 %183, %185 - %187 = call i32 @llvm.smin.i32(i32 %186, i32 32767) - %188 = call i32 @llvm.smax.i32(i32 %187, i32 -32768) + %187 = call i32 @llvm.smax.i32(i32 %186, i32 -32768) + %188 = call i32 @llvm.smin.i32(i32 %187, i32 32767) %189 = and i8 %124, 15 %190 = zext nneg i8 %189 to i64 %191 = getelementptr inbounds nuw [16 x i32], ptr @ma_dr_wav_read_pcm_frames_s16__msadpcm.adaptationTable, i64 0, i64 %190 @@ -94462,7 +94462,7 @@ define internal fastcc i64 @ma_dr_wav_read_pcm_frames_s16__ima(ptr noundef nonnu br i1 %.not298, label %.backedge, label %.lr.ph290 .lr.ph290: ; preds = %111, %192 - %indvars.iv326 = phi i64 [ %indvars.iv.next327, %192 ], [ 0, %111 ] + %indvars.iv326 = phi i64 [ %indvars.iv.next327, %195 ], [ 0, %111 ] call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %6) #66 %113 = load ptr, ptr %0, align 8, !tbaa !1141 %114 = load ptr, ptr %15, align 8, !tbaa !1143 @@ -94518,15 +94518,15 @@ define internal fastcc i64 @ma_dr_wav_read_pcm_frames_s16__ima(ptr noundef nonnu %144 = sub nsw i32 0, %.2 %.3 = select i1 %.not217, i32 %.2, i32 %144 %145 = add nsw i32 %.3, %133 - %146 = call i32 @llvm.smin.i32(i32 %145, i32 32767) - %147 = call i32 @llvm.smax.i32(i32 %146, i32 -32768) + %146 = call i32 @llvm.smax.i32(i32 %145, i32 -32768) + %147 = call i32 @llvm.smin.i32(i32 %146, i32 32767) store i32 %147, ptr %120, align 4, !tbaa !3 %148 = zext nneg i8 %127 to i64 %149 = getelementptr inbounds nuw [16 x i32], ptr @ma_dr_wav_read_pcm_frames_s16__ima.indexTable, i64 0, i64 %148 %150 = load i32, ptr %149, align 4, !tbaa !3 %151 = add nsw i32 %150, %129 - %spec.select = call i32 @llvm.smin.i32(i32 %151, i32 88) - %spec.select233 = call i32 @llvm.smax.i32(i32 %spec.select, i32 0) + %spec.select = call i32 @llvm.smax.i32(i32 %151, i32 0) + %spec.select233 = call i32 @llvm.umin.i32(i32 %spec.select, i32 88) store i32 %spec.select233, ptr %119, align 4, !tbaa !3 %152 = load i32, ptr %13, align 4, !tbaa !1894 %153 = mul i32 %152, %122 @@ -94562,15 +94562,15 @@ define internal fastcc i64 @ma_dr_wav_read_pcm_frames_s16__ima(ptr noundef nonnu %.not221234 = icmp slt i8 %126, 0 %.7 = select i1 %.not221234, i32 %175, i32 %.6 %176 = add nsw i32 %.7, %164 - %177 = call i32 @llvm.smin.i32(i32 %176, i32 32767) - %178 = call i32 @llvm.smax.i32(i32 %177, i32 -32768) + %177 = call i32 @llvm.smax.i32(i32 %176, i32 -32768) + %178 = call i32 @llvm.smin.i32(i32 %177, i32 32767) store i32 %178, ptr %120, align 4, !tbaa !3 %179 = zext nneg i8 %128 to i64 %180 = getelementptr inbounds nuw [16 x i32], ptr @ma_dr_wav_read_pcm_frames_s16__ima.indexTable, i64 0, i64 %179 %181 = load i32, ptr %180, align 4, !tbaa !3 %182 = add nsw i32 %181, %160 - %spec.select224 = call i32 @llvm.smin.i32(i32 %182, i32 88) - %183 = call i32 @llvm.smax.i32(i32 %spec.select224, i32 0) + %spec.select224 = call i32 @llvm.smax.i32(i32 %182, i32 0) + %183 = call i32 @llvm.umin.i32(i32 %spec.select224, i32 88) store i32 %183, ptr %119, align 4, !tbaa !3 %184 = load i32, ptr %13, align 4, !tbaa !1894 %185 = mul i32 %184, %122 @@ -94592,8 +94592,8 @@ define internal fastcc i64 @ma_dr_wav_read_pcm_frames_s16__ima(ptr noundef nonnu br i1 %193, label %.lr.ph290, label %.backedge, !llvm.loop !1899 .backedge: ; preds = %86, %192, %111, %108 - %.1189281310 = phi i64 [ %82, %111 ], [ %82, %108 ], [ %82, %192 ], [ %.1189281, %86 ] - %.2198280307 = phi i64 [ %83, %111 ], [ %83, %108 ], [ %83, %192 ], [ %.2198280, %86 ] + %.1189281310 = phi i64 [ %82, %111 ], [ %82, %108 ], [ %82, %195 ], [ %.1189281, %86 ] + %.2198280307 = phi i64 [ %83, %111 ], [ %83, %108 ], [ %83, %195 ], [ %.2198280, %86 ] %194 = load i64, ptr %7, align 8, !tbaa !1195 %195 = load i64, ptr %8, align 8, !tbaa !1194 %196 = icmp ult i64 %194, %195 @@ -149371,10 +149371,10 @@ declare double @llvm.fabs.f64(double) #74 declare i64 @llvm.abs.i64(i64, i1 immarg) #74 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i16 @llvm.smin.i16(i16, i16) #74 +declare i16 @llvm.smax.i16(i16, i16) #74 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i16 @llvm.smax.i16(i16, i16) #74 +declare i16 @llvm.smin.i16(i16, i16) #74 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.sqrt.f32(float) #74 diff --git a/bench/nuklear/optimized/unity.ll b/bench/nuklear/optimized/unity.ll index 6c56d1cb032..25fc9395f98 100644 --- a/bench/nuklear/optimized/unity.ll +++ b/bench/nuklear/optimized/unity.ll @@ -1168,14 +1168,14 @@ define i32 @nk_rgb_factor(i32 %0, float noundef %1) local_unnamed_addr #0 { ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable define i32 @nk_rgba(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 { - %5 = tail call i32 @llvm.smin.i32(i32 %0, i32 255) - %6 = tail call i32 @llvm.smax.i32(i32 %5, i32 0) - %7 = tail call i32 @llvm.smin.i32(i32 %1, i32 255) - %8 = tail call i32 @llvm.smax.i32(i32 %7, i32 0) - %9 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %10 = tail call i32 @llvm.smax.i32(i32 %9, i32 0) - %11 = tail call i32 @llvm.smin.i32(i32 %3, i32 255) - %12 = tail call i32 @llvm.smax.i32(i32 %11, i32 0) + %5 = tail call i32 @llvm.smax.i32(i32 %0, i32 0) + %6 = tail call i32 @llvm.umin.i32(i32 %5, i32 255) + %7 = tail call i32 @llvm.smax.i32(i32 %1, i32 0) + %8 = tail call i32 @llvm.umin.i32(i32 %7, i32 255) + %9 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %10 = tail call i32 @llvm.umin.i32(i32 %9, i32 255) + %11 = tail call i32 @llvm.smax.i32(i32 %3, i32 0) + %12 = tail call i32 @llvm.umin.i32(i32 %11, i32 255) %.sroa.4.0.insert.shift = shl nuw i32 %12, 24 %.sroa.3.0.insert.shift = shl nuw nsw i32 %10, 16 %.sroa.3.0.insert.insert = or disjoint i32 %.sroa.4.0.insert.shift, %.sroa.3.0.insert.shift @@ -1634,14 +1634,14 @@ define i32 @nk_rgba_iv(ptr noundef readonly captures(none) %0) local_unnamed_add %6 = load i32, ptr %5, align 4, !tbaa !7 %7 = getelementptr inbounds nuw i8, ptr %0, i64 12 %8 = load i32, ptr %7, align 4, !tbaa !7 - %9 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %10 = tail call i32 @llvm.smax.i32(i32 %9, i32 0) - %11 = tail call i32 @llvm.smin.i32(i32 %4, i32 255) - %12 = tail call i32 @llvm.smax.i32(i32 %11, i32 0) - %13 = tail call i32 @llvm.smin.i32(i32 %6, i32 255) - %14 = tail call i32 @llvm.smax.i32(i32 %13, i32 0) - %15 = tail call i32 @llvm.smin.i32(i32 %8, i32 255) - %16 = tail call i32 @llvm.smax.i32(i32 %15, i32 0) + %9 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %10 = tail call i32 @llvm.umin.i32(i32 %9, i32 255) + %11 = tail call i32 @llvm.smax.i32(i32 %4, i32 0) + %12 = tail call i32 @llvm.umin.i32(i32 %11, i32 255) + %13 = tail call i32 @llvm.smax.i32(i32 %6, i32 0) + %14 = tail call i32 @llvm.umin.i32(i32 %13, i32 255) + %15 = tail call i32 @llvm.smax.i32(i32 %8, i32 0) + %16 = tail call i32 @llvm.umin.i32(i32 %15, i32 255) %.sroa.4.0.insert.shift.i = shl nuw i32 %16, 24 %.sroa.3.0.insert.shift.i = shl nuw nsw i32 %14, 16 %.sroa.3.0.insert.insert.i = or disjoint i32 %.sroa.4.0.insert.shift.i, %.sroa.3.0.insert.shift.i @@ -1659,12 +1659,12 @@ define i32 @nk_rgba_bv(ptr noundef readonly captures(none) %0) local_unnamed_add ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable define range(i32 -16777216, 0) i32 @nk_rgb(i32 noundef %0, i32 noundef %1, i32 noundef %2) local_unnamed_addr #0 { - %4 = tail call i32 @llvm.smin.i32(i32 %0, i32 255) - %5 = tail call i32 @llvm.smax.i32(i32 %4, i32 0) - %6 = tail call i32 @llvm.smin.i32(i32 %1, i32 255) - %7 = tail call i32 @llvm.smax.i32(i32 %6, i32 0) - %8 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %9 = tail call i32 @llvm.smax.i32(i32 %8, i32 0) + %4 = tail call i32 @llvm.smax.i32(i32 %0, i32 0) + %5 = tail call i32 @llvm.umin.i32(i32 %4, i32 255) + %6 = tail call i32 @llvm.smax.i32(i32 %1, i32 0) + %7 = tail call i32 @llvm.umin.i32(i32 %6, i32 255) + %8 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %9 = tail call i32 @llvm.umin.i32(i32 %8, i32 255) %.sroa.3.0.insert.shift = shl nuw nsw i32 %9, 16 %.sroa.2.0.insert.shift = shl nuw nsw i32 %7, 8 %.sroa.3.0.insert.insert = or disjoint i32 %.sroa.3.0.insert.shift, %.sroa.2.0.insert.shift @@ -1680,12 +1680,12 @@ define range(i32 -16777216, 0) i32 @nk_rgb_iv(ptr noundef readonly captures(none %4 = load i32, ptr %3, align 4, !tbaa !7 %5 = getelementptr inbounds nuw i8, ptr %0, i64 8 %6 = load i32, ptr %5, align 4, !tbaa !7 - %7 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %8 = tail call i32 @llvm.smax.i32(i32 %7, i32 0) - %9 = tail call i32 @llvm.smin.i32(i32 %4, i32 255) - %10 = tail call i32 @llvm.smax.i32(i32 %9, i32 0) - %11 = tail call i32 @llvm.smin.i32(i32 %6, i32 255) - %12 = tail call i32 @llvm.smax.i32(i32 %11, i32 0) + %7 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %8 = tail call i32 @llvm.umin.i32(i32 %7, i32 255) + %9 = tail call i32 @llvm.smax.i32(i32 %4, i32 0) + %10 = tail call i32 @llvm.umin.i32(i32 %9, i32 255) + %11 = tail call i32 @llvm.smax.i32(i32 %6, i32 0) + %12 = tail call i32 @llvm.umin.i32(i32 %11, i32 255) %.sroa.3.0.insert.shift.i = shl nuw nsw i32 %12, 16 %.sroa.2.0.insert.shift.i = shl nuw nsw i32 %10, 8 %.sroa.3.0.insert.insert.i = or disjoint i32 %.sroa.3.0.insert.shift.i, %.sroa.2.0.insert.shift.i @@ -1961,12 +1961,12 @@ nk_rgb_f.exit: ; preds = %2, %6 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable define range(i32 -16777216, 0) i32 @nk_hsv(i32 noundef %0, i32 noundef %1, i32 noundef %2) local_unnamed_addr #2 { - %4 = tail call i32 @llvm.smin.i32(i32 %1, i32 255) - %5 = tail call i32 @llvm.smax.i32(i32 %4, i32 0) + %4 = tail call i32 @llvm.smax.i32(i32 %1, i32 0) + %5 = tail call i32 @llvm.umin.i32(i32 %4, i32 255) %6 = uitofp nneg i32 %5 to float %7 = fdiv float %6, 2.550000e+02 - %8 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %9 = tail call i32 @llvm.smax.i32(i32 %8, i32 0) + %8 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %9 = tail call i32 @llvm.umin.i32(i32 %8, i32 255) %10 = uitofp nneg i32 %9 to float %11 = fdiv float %10, 2.550000e+02 %12 = fcmp ugt float %7, 0.000000e+00 @@ -1978,8 +1978,8 @@ define range(i32 -16777216, 0) i32 @nk_hsv(i32 noundef %0, i32 noundef %1, i32 n br label %nk_hsva.exit 14: ; preds = %3 - %15 = tail call i32 @llvm.smin.i32(i32 %0, i32 255) - %16 = tail call i32 @llvm.smax.i32(i32 %15, i32 0) + %15 = tail call i32 @llvm.smax.i32(i32 %0, i32 0) + %16 = tail call i32 @llvm.umin.i32(i32 %15, i32 255) %17 = uitofp nneg i32 %16 to float %18 = fdiv float %17, 2.550000e+02 %19 = fdiv float %18, 0x3FC5555560000000 @@ -2063,12 +2063,12 @@ nk_hsva.exit: ; preds = %13, %36 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable define i32 @nk_hsva(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #2 { - %5 = tail call i32 @llvm.smin.i32(i32 %1, i32 255) - %6 = tail call i32 @llvm.smax.i32(i32 %5, i32 0) + %5 = tail call i32 @llvm.smax.i32(i32 %1, i32 0) + %6 = tail call i32 @llvm.umin.i32(i32 %5, i32 255) %7 = uitofp nneg i32 %6 to float %8 = fdiv float %7, 2.550000e+02 - %9 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %10 = tail call i32 @llvm.smax.i32(i32 %9, i32 0) + %9 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %10 = tail call i32 @llvm.umin.i32(i32 %9, i32 255) %11 = uitofp nneg i32 %10 to float %12 = fdiv float %11, 2.550000e+02 %13 = fcmp ugt float %8, 0.000000e+00 @@ -2080,8 +2080,8 @@ define i32 @nk_hsva(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef br label %nk_hsva_f.exit 15: ; preds = %4 - %16 = tail call i32 @llvm.smin.i32(i32 %0, i32 255) - %17 = tail call i32 @llvm.smax.i32(i32 %16, i32 0) + %16 = tail call i32 @llvm.smax.i32(i32 %0, i32 0) + %17 = tail call i32 @llvm.umin.i32(i32 %16, i32 255) %18 = uitofp nneg i32 %17 to float %19 = fdiv float %18, 2.550000e+02 %20 = fdiv float %19, 0x3FC5555560000000 @@ -2131,8 +2131,8 @@ define i32 @nk_hsva(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef nk_hsva_f.exit: ; preds = %14, %37 %.sroa.16.8.vec.insert73.sink.i.i = phi <2 x float> [ %.sroa.16.8.vec.insert73.i.i, %37 ], [ %.sroa.0.0.vec.insert.i.i, %14 ] %.sroa.0.0.i.i = phi <2 x float> [ %.sroa.0.4.vec.insert60.i.i, %37 ], [ %.sroa.0.4.vec.insert.i.i, %14 ] - %38 = tail call i32 @llvm.smin.i32(i32 %3, i32 255) - %39 = tail call i32 @llvm.smax.i32(i32 %38, i32 0) + %38 = tail call i32 @llvm.smax.i32(i32 %3, i32 0) + %39 = tail call i32 @llvm.umin.i32(i32 %38, i32 255) %40 = uitofp nneg i32 %39 to float %41 = fdiv float %40, 2.550000e+02 %.sroa.0.0.vec.extract.i = extractelement <2 x float> %.sroa.0.0.i.i, i64 0 @@ -2181,12 +2181,12 @@ define range(i32 -16777216, 0) i32 @nk_hsv_iv(ptr noundef readonly captures(none %3 = load i32, ptr %2, align 4, !tbaa !7 %4 = getelementptr inbounds nuw i8, ptr %0, i64 8 %5 = load i32, ptr %4, align 4, !tbaa !7 - %6 = tail call i32 @llvm.smin.i32(i32 %3, i32 255) - %7 = tail call i32 @llvm.smax.i32(i32 %6, i32 0) + %6 = tail call i32 @llvm.smax.i32(i32 %3, i32 0) + %7 = tail call i32 @llvm.umin.i32(i32 %6, i32 255) %8 = uitofp nneg i32 %7 to float %9 = fdiv float %8, 2.550000e+02 - %10 = tail call i32 @llvm.smin.i32(i32 %5, i32 255) - %11 = tail call i32 @llvm.smax.i32(i32 %10, i32 0) + %10 = tail call i32 @llvm.smax.i32(i32 %5, i32 0) + %11 = tail call i32 @llvm.umin.i32(i32 %10, i32 255) %12 = uitofp nneg i32 %11 to float %13 = fdiv float %12, 2.550000e+02 %14 = fcmp ugt float %9, 0.000000e+00 @@ -2199,8 +2199,8 @@ define range(i32 -16777216, 0) i32 @nk_hsv_iv(ptr noundef readonly captures(none 16: ; preds = %1 %17 = load i32, ptr %0, align 4, !tbaa !7 - %18 = tail call i32 @llvm.smin.i32(i32 %17, i32 255) - %19 = tail call i32 @llvm.smax.i32(i32 %18, i32 0) + %18 = tail call i32 @llvm.smax.i32(i32 %17, i32 0) + %19 = tail call i32 @llvm.umin.i32(i32 %18, i32 255) %20 = uitofp nneg i32 %19 to float %21 = fdiv float %20, 2.550000e+02 %22 = fdiv float %21, 0x3FC5555560000000 @@ -2674,12 +2674,12 @@ define i32 @nk_hsva_iv(ptr noundef readonly captures(none) %0) local_unnamed_add %5 = load i32, ptr %4, align 4, !tbaa !7 %6 = getelementptr inbounds nuw i8, ptr %0, i64 12 %7 = load i32, ptr %6, align 4, !tbaa !7 - %8 = tail call i32 @llvm.smin.i32(i32 %3, i32 255) - %9 = tail call i32 @llvm.smax.i32(i32 %8, i32 0) + %8 = tail call i32 @llvm.smax.i32(i32 %3, i32 0) + %9 = tail call i32 @llvm.umin.i32(i32 %8, i32 255) %10 = uitofp nneg i32 %9 to float %11 = fdiv float %10, 2.550000e+02 - %12 = tail call i32 @llvm.smin.i32(i32 %5, i32 255) - %13 = tail call i32 @llvm.smax.i32(i32 %12, i32 0) + %12 = tail call i32 @llvm.smax.i32(i32 %5, i32 0) + %13 = tail call i32 @llvm.umin.i32(i32 %12, i32 255) %14 = uitofp nneg i32 %13 to float %15 = fdiv float %14, 2.550000e+02 %16 = fcmp ugt float %11, 0.000000e+00 @@ -2692,8 +2692,8 @@ define i32 @nk_hsva_iv(ptr noundef readonly captures(none) %0) local_unnamed_add 18: ; preds = %1 %19 = load i32, ptr %0, align 4, !tbaa !7 - %20 = tail call i32 @llvm.smin.i32(i32 %19, i32 255) - %21 = tail call i32 @llvm.smax.i32(i32 %20, i32 0) + %20 = tail call i32 @llvm.smax.i32(i32 %19, i32 0) + %21 = tail call i32 @llvm.umin.i32(i32 %20, i32 255) %22 = uitofp nneg i32 %21 to float %23 = fdiv float %22, 2.550000e+02 %24 = fdiv float %23, 0x3FC5555560000000 @@ -2743,8 +2743,8 @@ define i32 @nk_hsva_iv(ptr noundef readonly captures(none) %0) local_unnamed_add nk_hsva.exit: ; preds = %17, %41 %.sroa.16.8.vec.insert73.sink.i.i.i = phi <2 x float> [ %.sroa.16.8.vec.insert73.i.i.i, %41 ], [ %.sroa.0.0.vec.insert.i.i.i, %17 ] %.sroa.0.0.i.i.i = phi <2 x float> [ %.sroa.0.4.vec.insert60.i.i.i, %41 ], [ %.sroa.0.4.vec.insert.i.i.i, %17 ] - %42 = tail call i32 @llvm.smin.i32(i32 %7, i32 255) - %43 = tail call i32 @llvm.smax.i32(i32 %42, i32 0) + %42 = tail call i32 @llvm.smax.i32(i32 %7, i32 0) + %43 = tail call i32 @llvm.umin.i32(i32 %42, i32 255) %44 = uitofp nneg i32 %43 to float %45 = fdiv float %44, 2.550000e+02 %.sroa.0.0.vec.extract.i.i = extractelement <2 x float> %.sroa.0.0.i.i.i, i64 0 diff --git a/bench/oiio/optimized/sysutil.ll b/bench/oiio/optimized/sysutil.ll index 115d7ef5991..1a02241374f 100644 --- a/bench/oiio/optimized/sysutil.ll +++ b/bench/oiio/optimized/sysutil.ll @@ -1266,12 +1266,12 @@ define void @_ZN11OpenImageIO6v3_1_07Sysutil4Term12ansi_fgcolorB5cxx11Eiii(ptr d br i1 %11, label %12, label %65 12: ; preds = %5 - %.sroa.speculated43 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %.sroa.speculated22 = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated43, i32 0) - %.sroa.speculated35 = tail call i32 @llvm.smin.i32(i32 %3, i32 255) - %.sroa.speculated16 = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated35, i32 0) - %.sroa.speculated27 = tail call i32 @llvm.smin.i32(i32 %4, i32 255) - %.sroa.speculated = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated27, i32 0) + %.sroa.speculated43 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %.sroa.speculated22 = tail call i32 @llvm.umin.i32(i32 %.sroa.speculated43, i32 255) + %.sroa.speculated35 = tail call i32 @llvm.smax.i32(i32 %3, i32 0) + %.sroa.speculated16 = tail call i32 @llvm.umin.i32(i32 %.sroa.speculated35, i32 255) + %.sroa.speculated27 = tail call i32 @llvm.smax.i32(i32 %4, i32 0) + %.sroa.speculated = tail call i32 @llvm.umin.i32(i32 %.sroa.speculated27, i32 255) call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %7) #32 call void @llvm.lifetime.start.p0(i64 48, ptr nonnull %6) #32, !noalias !40 %.sroa.024.0.insert.ext.i.i = zext nneg i32 %.sroa.speculated22 to i64 @@ -1283,7 +1283,7 @@ define void @_ZN11OpenImageIO6v3_1_07Sysutil4Term12ansi_fgcolorB5cxx11Eiii(ptr d %14 = getelementptr inbounds nuw i8, ptr %6, i64 32 store i64 %.sroa.018.0.insert.ext.i.i, ptr %14, align 16, !alias.scope !43, !noalias !40 invoke void @_ZN3fmt2v87vformatB5cxx11ENS0_17basic_string_viewIcEENS0_17basic_format_argsINS0_20basic_format_contextINS0_8appenderEcEEEE(ptr dead_on_unwind nonnull writable sret(%"class.std::__cxx11::basic_string") align 8 %7, ptr nonnull @.str.86, i64 16, i64 273, ptr nonnull %6) - to label %15 unwind label %57 + to label %21 unwind label %57 15: ; preds = %12 call void @llvm.lifetime.end.p0(i64 48, ptr nonnull %6) #32, !noalias !40 @@ -1316,7 +1316,7 @@ _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.threa br i1 %.not22.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_.exit, label %31, !prof !46 31: ; preds = %26 - switch i64 %29, label %34 [ + switch i64 %29, label %40 [ i64 0, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i i64 1, label %32 ] @@ -1370,7 +1370,7 @@ _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit24.thr br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_.exit _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_.exit: ; preds = %26, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i, %45, %46 - %48 = phi ptr [ %.pre.i, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i ], [ %16, %45 ], [ %47, %46 ], [ %27, %26 ] + %48 = phi ptr [ %.pre.i, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i ], [ %16, %51 ], [ %47, %52 ], [ %27, %32 ] %49 = getelementptr inbounds nuw i8, ptr %7, i64 8 store i64 0, ptr %49, align 8, !tbaa !19 store i8 0, ptr %48, align 1, !tbaa !13 @@ -1436,12 +1436,12 @@ define void @_ZN11OpenImageIO6v3_1_07Sysutil4Term12ansi_bgcolorB5cxx11Eiii(ptr d br i1 %11, label %12, label %65 12: ; preds = %5 - %.sroa.speculated43 = tail call i32 @llvm.smin.i32(i32 %2, i32 255) - %.sroa.speculated22 = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated43, i32 0) - %.sroa.speculated35 = tail call i32 @llvm.smin.i32(i32 %3, i32 255) - %.sroa.speculated16 = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated35, i32 0) - %.sroa.speculated27 = tail call i32 @llvm.smin.i32(i32 %4, i32 255) - %.sroa.speculated = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated27, i32 0) + %.sroa.speculated43 = tail call i32 @llvm.smax.i32(i32 %2, i32 0) + %.sroa.speculated22 = tail call i32 @llvm.umin.i32(i32 %.sroa.speculated43, i32 255) + %.sroa.speculated35 = tail call i32 @llvm.smax.i32(i32 %3, i32 0) + %.sroa.speculated16 = tail call i32 @llvm.umin.i32(i32 %.sroa.speculated35, i32 255) + %.sroa.speculated27 = tail call i32 @llvm.smax.i32(i32 %4, i32 0) + %.sroa.speculated = tail call i32 @llvm.umin.i32(i32 %.sroa.speculated27, i32 255) call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %7) #32 call void @llvm.lifetime.start.p0(i64 48, ptr nonnull %6) #32, !noalias !47 %.sroa.024.0.insert.ext.i.i = zext nneg i32 %.sroa.speculated22 to i64 @@ -1453,7 +1453,7 @@ define void @_ZN11OpenImageIO6v3_1_07Sysutil4Term12ansi_bgcolorB5cxx11Eiii(ptr d %14 = getelementptr inbounds nuw i8, ptr %6, i64 32 store i64 %.sroa.018.0.insert.ext.i.i, ptr %14, align 16, !alias.scope !50, !noalias !47 invoke void @_ZN3fmt2v87vformatB5cxx11ENS0_17basic_string_viewIcEENS0_17basic_format_argsINS0_20basic_format_contextINS0_8appenderEcEEEE(ptr dead_on_unwind nonnull writable sret(%"class.std::__cxx11::basic_string") align 8 %7, ptr nonnull @.str.87, i64 16, i64 273, ptr nonnull %6) - to label %15 unwind label %57 + to label %21 unwind label %57 15: ; preds = %12 call void @llvm.lifetime.end.p0(i64 48, ptr nonnull %6) #32, !noalias !47 @@ -1486,7 +1486,7 @@ _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.threa br i1 %.not22.i, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_.exit, label %31, !prof !46 31: ; preds = %26 - switch i64 %29, label %34 [ + switch i64 %29, label %40 [ i64 0, label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i i64 1, label %32 ] @@ -1540,7 +1540,7 @@ _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit24.thr br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_.exit _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEaSEOS4_.exit: ; preds = %26, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i, %45, %46 - %48 = phi ptr [ %.pre.i, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i ], [ %16, %45 ], [ %47, %46 ], [ %27, %26 ] + %48 = phi ptr [ %.pre.i, %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7_S_copyEPcPKcm.exit.i ], [ %16, %51 ], [ %47, %52 ], [ %27, %32 ] %49 = getelementptr inbounds nuw i8, ptr %7, i64 8 store i64 0, ptr %49, align 8, !tbaa !19 store i8 0, ptr %48, align 1, !tbaa !13 diff --git a/bench/opencv/optimized/fast_gemm.ll b/bench/opencv/optimized/fast_gemm.ll index df387e1350f..92580a0ab01 100644 --- a/bench/opencv/optimized/fast_gemm.ll +++ b/bench/opencv/optimized/fast_gemm.ll @@ -1459,8 +1459,8 @@ _ZN2cv3dnn12cpu_baselineL20fast_gemm_pack12_f32EiiPKviiPv.exit.thread: ; preds = .preheader70.us.i: ; preds = %._crit_edge.us.i, %.preheader70.lr.ph.i %indvars.iv28 = phi i32 [ %indvars.iv.next29, %._crit_edge.us.i ], [ %., %.preheader70.lr.ph.i ] %indvars.iv91.i = phi i64 [ %indvars.iv.next92.i, %._crit_edge.us.i ], [ 0, %.preheader70.lr.ph.i ] - %332 = tail call i32 @llvm.smin.i32(i32 %indvars.iv28, i32 8) - %333 = tail call i32 @llvm.smax.i32(i32 %332, i32 1) + %332 = tail call i32 @llvm.smax.i32(i32 %indvars.iv28, i32 1) + %333 = tail call i32 @llvm.umin.i32(i32 %332, i32 8) %smax32 = zext nneg i32 %333 to i64 %334 = trunc nuw nsw i64 %indvars.iv91.i to i32 %factor.op.mul.reass.us.i = mul i32 %factor.op.mul76.i, %334 @@ -2246,8 +2246,8 @@ _ZN2cv3dnn12cpu_baselineL19fast_gemm_pack8_f32EiiPKviiPv.exit: ; preds = %.loope .preheader70.us.i: ; preds = %._crit_edge.us.i, %.preheader70.us.preheader.i %indvars.iv26 = phi i32 [ %indvars.iv.next27, %._crit_edge.us.i ], [ %., %.preheader70.us.preheader.i ] %indvars.iv91.i = phi i64 [ %indvars.iv.next92.i, %._crit_edge.us.i ], [ 0, %.preheader70.us.preheader.i ] - %219 = tail call i32 @llvm.smin.i32(i32 %indvars.iv26, i32 8) - %220 = tail call i32 @llvm.smax.i32(i32 %219, i32 1) + %219 = tail call i32 @llvm.smax.i32(i32 %indvars.iv26, i32 1) + %220 = tail call i32 @llvm.umin.i32(i32 %219, i32 8) %smax30 = zext nneg i32 %220 to i64 %221 = trunc nuw nsw i64 %indvars.iv91.i to i32 %factor.op.mul.reass.us.i = mul i32 %factor.op.mul76.i, %221 @@ -6366,8 +6366,8 @@ _ZN2cv3dnn12cpu_baselineL20fast_gemm_pack12_f32EiiPKviiPv.exit.thread: ; preds = .preheader70.us.i: ; preds = %._crit_edge.us.i, %.preheader70.lr.ph.i %indvars.iv28 = phi i32 [ %indvars.iv.next29, %._crit_edge.us.i ], [ %., %.preheader70.lr.ph.i ] %indvars.iv91.i = phi i64 [ %indvars.iv.next92.i, %._crit_edge.us.i ], [ 0, %.preheader70.lr.ph.i ] - %360 = tail call i32 @llvm.smin.i32(i32 %indvars.iv28, i32 8) - %361 = tail call i32 @llvm.smax.i32(i32 %360, i32 1) + %360 = tail call i32 @llvm.smax.i32(i32 %indvars.iv28, i32 1) + %361 = tail call i32 @llvm.umin.i32(i32 %360, i32 8) %smax32 = zext nneg i32 %361 to i64 %362 = trunc nuw nsw i64 %indvars.iv91.i to i32 %factor.op.mul.reass.us.i = mul i32 %factor.op.mul76.i, %362 @@ -7027,8 +7027,8 @@ _ZN2cv3dnn12cpu_baselineL19fast_gemm_pack8_f32EiiPKviiPv.exit: ; preds = %.loope .preheader70.us.i: ; preds = %._crit_edge.us.i, %.preheader70.us.preheader.i %indvars.iv26 = phi i32 [ %indvars.iv.next27, %._crit_edge.us.i ], [ %., %.preheader70.us.preheader.i ] %indvars.iv91.i = phi i64 [ %indvars.iv.next92.i, %._crit_edge.us.i ], [ 0, %.preheader70.us.preheader.i ] - %247 = tail call i32 @llvm.smin.i32(i32 %indvars.iv26, i32 8) - %248 = tail call i32 @llvm.smax.i32(i32 %247, i32 1) + %247 = tail call i32 @llvm.smax.i32(i32 %indvars.iv26, i32 1) + %248 = tail call i32 @llvm.umin.i32(i32 %247, i32 8) %smax30 = zext nneg i32 %248 to i64 %249 = trunc nuw nsw i64 %indvars.iv91.i to i32 %factor.op.mul.reass.us.i = mul i32 %factor.op.mul76.i, %249 @@ -7322,6 +7322,9 @@ declare i32 @llvm.smax.i32(i32, i32) #23 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umin.i64(i64, i64) #23 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #23 + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umax.i64(i64, i64) #23 diff --git a/bench/opencv/optimized/finder_pattern_info.ll b/bench/opencv/optimized/finder_pattern_info.ll index 173a88bc9e3..f5cc39cc9c2 100644 --- a/bench/opencv/optimized/finder_pattern_info.ll +++ b/bench/opencv/optimized/finder_pattern_info.ll @@ -341,8 +341,8 @@ _ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit21: ; preds = %_ZN5zxing3RefIN %114 = load ptr, ptr %21, align 8, !tbaa !14 %115 = tail call noundef i32 @_ZNK5zxing6qrcode13FinderPattern8getCountEv(ptr noundef nonnull align 8 dereferenceable(40) %114) %116 = add nsw i32 %113, %115 - %.sroa.speculated34 = tail call i32 @llvm.smin.i32(i32 %116, i32 10) - %.sroa.speculated = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated34, i32 3) + %.sroa.speculated34 = tail call i32 @llvm.smax.i32(i32 %116, i32 3) + %.sroa.speculated = tail call i32 @llvm.umin.i32(i32 %.sroa.speculated34, i32 10) %117 = add nsw i32 %.sroa.speculated, -3 %118 = uitofp nneg i32 %117 to double %119 = fdiv double %118, 7.000000e+00 @@ -378,7 +378,7 @@ _ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit21: ; preds = %_ZN5zxing3RefIN %136 = load ptr, ptr %22, align 8, !tbaa !8 %137 = getelementptr inbounds nuw i8, ptr %136, i64 8 %138 = load ptr, ptr %137, align 8 - tail call void %138(ptr noundef nonnull align 8 dereferenceable(12) %22) #10 + tail call void %140(ptr noundef nonnull align 8 dereferenceable(12) %22) #10 br label %_ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit23 _ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit23: ; preds = %135, %130, %128 @@ -397,7 +397,7 @@ _ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit23: ; preds = %135, %130, %128 %145 = load ptr, ptr %15, align 8, !tbaa !8 %146 = getelementptr inbounds nuw i8, ptr %145, i64 8 %147 = load ptr, ptr %146, align 8 - tail call void %147(ptr noundef nonnull align 8 dereferenceable(12) %15) #10 + tail call void %149(ptr noundef nonnull align 8 dereferenceable(12) %15) #10 br label %_ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit25 _ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit25: ; preds = %144, %139, %_ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit23 @@ -416,7 +416,7 @@ _ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit25: ; preds = %144, %139, %_ZN %154 = load ptr, ptr %9, align 8, !tbaa !8 %155 = getelementptr inbounds nuw i8, ptr %154, i64 8 %156 = load ptr, ptr %155, align 8 - tail call void %156(ptr noundef nonnull align 8 dereferenceable(12) %9) #10 + tail call void %158(ptr noundef nonnull align 8 dereferenceable(12) %9) #10 br label %_ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit27 _ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit27: ; preds = %_ZN5zxing3RefINS_6qrcode13FinderPatternEED2Ev.exit25, %148, %153 @@ -766,10 +766,10 @@ declare float @acosf(float noundef) local_unnamed_addr #8 declare float @llvm.fabs.f32(float) #4 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #9 +declare i32 @llvm.smax.i32(i32, i32) #9 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smax.i32(i32, i32) #9 +declare i32 @llvm.umin.i32(i32, i32) #9 attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } diff --git a/bench/openexr/optimized/ImfLut.ll b/bench/openexr/optimized/ImfLut.ll index d5a4521a7dd..fc792f22603 100644 --- a/bench/openexr/optimized/ImfLut.ll +++ b/bench/openexr/optimized/ImfLut.ll @@ -942,74 +942,74 @@ define i16 @_ZN7Imf_3_410round12logEN9Imath_3_24halfE(i16 %0) local_unnamed_addr %12 = fdiv double %11, 0x3FE62E42FEFA39EF %13 = fadd double %12, 2.000500e+03 %14 = fptosi double %13 to i32 - %spec.store.select = tail call i32 @llvm.smin.i32(i32 %14, i32 4095) - %spec.store.select1 = tail call i32 @llvm.smax.i32(i32 %spec.store.select, i32 1) + %spec.store.select = tail call i32 @llvm.smax.i32(i32 %14, i32 1) + %spec.store.select1 = tail call i32 @llvm.umin.i32(i32 %spec.store.select, i32 4095) %15 = add nsw i32 %spec.store.select1, -2000 %16 = sitofp i32 %15 to double %17 = fdiv double %16, 2.000000e+02 %exp2 = tail call double @exp2(double %17) %18 = fmul double %exp2, 0x3FC6A09E60000000 %19 = fptrunc double %18 to float - %20 = bitcast float %19 to i32 - %21 = tail call float @llvm.fabs.f32(float %19) %22 = bitcast float %21 to i32 - %23 = lshr i32 %20, 16 - %24 = trunc nuw i32 %23 to i16 - %25 = and i16 %24, -32768 - %26 = icmp samesign ugt i32 %22, 947912703 - br i1 %26, label %27, label %53 - -27: ; preds = %7 - %28 = icmp samesign ugt i32 %22, 2139095039 - br i1 %28, label %29, label %40, !prof !43 - -29: ; preds = %27 - %30 = or disjoint i16 %25, 31744 - %31 = icmp eq i32 %22, 2139095040 - br i1 %31, label %_ZN9Imath_3_24halfC2Ef.exit, label %32 - -32: ; preds = %29 - %33 = lshr i32 %22, 13 - %34 = and i32 %33, 1023 - %35 = icmp eq i32 %34, 0 - %36 = zext i1 %35 to i16 - %37 = trunc nuw nsw i32 %34 to i16 - %38 = or i16 %37, %36 - %39 = or disjoint i16 %38, %30 + %23 = tail call float @llvm.fabs.f32(float %21) + %24 = bitcast float %23 to i32 + %25 = lshr i32 %22, 16 + %26 = trunc nuw i32 %25 to i16 + %27 = and i16 %26, -32768 + %28 = icmp samesign ugt i32 %24, 947912703 + br i1 %28, label %29, label %55 + +29: ; preds = %7 + %30 = icmp samesign ugt i32 %24, 2139095039 + br i1 %30, label %31, label %42, !prof !43 + +31:; preds = %29 + %32 = or disjoint i16 %27, 31744 + %33 = icmp eq i32 %24, 2139095040 + br i1 %33, label %_ZN9Imath_3_24halfC2Ef.exit, label %34 + +34:; preds = %31 + %35 = lshr i32 %24, 13 + %36 = and i32 %35, 1023 + %37 = icmp eq i32 %36, 0 + %38 = zext i1 %37 to i16 + %39 = trunc nuw nsw i32 %36 to i16 + %40 = or i16 %39, %38 + %41 = or disjoint i16 %40, %32 br label %_ZN9Imath_3_24halfC2Ef.exit -40: ; preds = %27 - %41 = icmp samesign ugt i32 %22, 1199566847 +40: ; preds = %29 + %41 = icmp samesign ugt i32 %24, 1199566847 br i1 %41, label %42, label %44, !prof !43 42: ; preds = %40 - %43 = or disjoint i16 %25, 31744 + %43 = or disjoint i16 %27, 31744 br label %_ZN9Imath_3_24halfC2Ef.exit 44: ; preds = %40 - %45 = add nuw nsw i32 %22, 134221823 - %46 = lshr i32 %22, 13 + %45 = add nuw nsw i32 %24, 134221823 + %46 = lshr i32 %24, 13 %47 = and i32 %46, 1 %48 = add nuw nsw i32 %45, %47 %49 = lshr i32 %48, 13 - %50 = and i32 %23, 32768 + %50 = and i32 %25, 32768 %51 = or i32 %49, %50 %52 = trunc i32 %51 to i16 br label %_ZN9Imath_3_24halfC2Ef.exit 53: ; preds = %7 - %54 = icmp samesign ult i32 %22, 855638017 + %54 = icmp samesign ult i32 %24, 855638017 br i1 %54, label %_ZN9Imath_3_24halfC2Ef.exit, label %55 55: ; preds = %53 - %56 = lshr i32 %22, 23 + %56 = lshr i32 %24, 23 %57 = sub nuw nsw i32 126, %56 - %58 = and i32 %22, 8388607 + %58 = and i32 %24, 8388607 %59 = or disjoint i32 %58, 8388608 %60 = add nsw i32 %56, -94 %61 = shl i32 %59, %60 %62 = lshr i32 %59, %57 - %63 = and i32 %23, 32768 + %63 = and i32 %25, 32768 %64 = or i32 %62, %63 %65 = trunc nuw i32 %64 to i16 %66 = icmp ugt i32 %61, -2147483648 @@ -1026,8 +1026,8 @@ define i16 @_ZN7Imf_3_410round12logEN9Imath_3_24halfE(i16 %0) local_unnamed_addr %71 = add nuw i16 %65, 1 br label %_ZN9Imath_3_24halfC2Ef.exit -_ZN9Imath_3_24halfC2Ef.exit: ; preds = %70, %67, %53, %44, %42, %32, %29, %1 - %.sroa.06.0 = phi i16 [ 0, %1 ], [ %39, %32 ], [ %43, %42 ], [ %52, %44 ], [ %30, %29 ], [ %25, %53 ], [ %71, %70 ], [ %65, %67 ] +_ZN9Imath_3_24halfC2Ef.exit: ; preds = %70, %67, %53, %44, %42, %34, %31, %1 + %.sroa.06.0 = phi i16 [ 0, %1 ], [ %41, %34 ], [ %43, %44 ], [ %52, %46 ], [ %32, %31 ], [ %27, %55 ], [ %71, %72 ], [ %65, %69 ] ret i16 %.sroa.06.0 } @@ -1043,9 +1043,6 @@ define internal void @_GLOBAL__sub_I_ImfLut.cpp() #7 section ".text.startup" { ret void } -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #8 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #8 @@ -1054,6 +1051,9 @@ declare double @exp2(double) local_unnamed_addr ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare float @llvm.fabs.f32(float) #8 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #8 + attributes #0 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { nofree nounwind } diff --git a/bench/openexr/optimized/ImfRgbaFile.ll b/bench/openexr/optimized/ImfRgbaFile.ll index 5f678c936d9..caff8eff9ab 100644 --- a/bench/openexr/optimized/ImfRgbaFile.ll +++ b/bench/openexr/optimized/ImfRgbaFile.ll @@ -4077,7 +4077,7 @@ _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.exit55: ; preds = %17 20: ; preds = %_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.exit55 invoke void @__cxa_throw(ptr nonnull %19, ptr nonnull @_ZTIN7Iex_3_46ArgExcE, ptr nonnull @_ZN7Iex_3_46ArgExcD1Ev) #34 - to label %173 unwind label %21 + to label %175 unwind label %21 21: ; preds = %17, %9, %20, %15, %_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc.exit %22 = landingpad { ptr, i32 } @@ -4199,16 +4199,16 @@ _ZN7Imf_3_413RgbaInputFile7FromYca10rotateBuf2Ei.exit: ; preds = %63 br label %80 .lr.ph95: ; preds = %80 - %.sroa.speculated70 = tail call i32 @llvm.smin.i32(i32 %73, i32 3) %77 = getelementptr inbounds nuw i8, ptr %0, i64 80 %78 = getelementptr inbounds nuw i8, ptr %0, i64 64 %79 = getelementptr inbounds nuw i8, ptr %0, i64 336 - %smax = tail call i32 @llvm.smax.i32(i32 %.sroa.speculated70, i32 1) - %wide.trip.count = zext nneg i32 %smax to i64 - br label %86 + %smax = tail call i32 @llvm.smax.i32(i32 %73, i32 1) + %81 = tail call i32 @llvm.umin.i32(i32 %smax, i32 3) + %wide.trip.count = zext nneg i32 %81 to i64 + br label %88 80: ; preds = %72, %80 - %indvars.iv105 = phi i64 [ %umin104, %72 ], [ %indvars.iv.next106, %80 ] + %indvars.iv105 = phi i64 [ %umin104, %72 ], [ %indvars.iv.next106, %82 ] %indvars.iv.next106 = add nsw i64 %indvars.iv105, -1 %81 = getelementptr inbounds nuw [29 x ptr], ptr %75, i64 0, i64 %indvars.iv.next106 %82 = load ptr, ptr %81, align 8, !tbaa !41 @@ -4218,35 +4218,35 @@ _ZN7Imf_3_413RgbaInputFile7FromYca10rotateBuf2Ei.exit: ; preds = %63 %85 = icmp samesign ugt i64 %indvars.iv105, 1 br i1 %85, label %80, label %.lr.ph95, !llvm.loop !145 -86: ; preds = %.lr.ph95, %103 - %indvars.iv107 = phi i64 [ 0, %.lr.ph95 ], [ %indvars.iv.next108, %103 ] +86: ; preds = %.lr.ph95, %105 + %indvars.iv107 = phi i64 [ 0, %.lr.ph95 ], [ %indvars.iv.next108, %105 ] %87 = trunc i64 %indvars.iv107 to i32 %88 = add i32 %1, %87 %89 = and i32 %88, 1 %.not53 = icmp eq i32 %89, 0 %90 = load i32, ptr %78, align 8, !tbaa !117 - br i1 %.not53, label %97, label %91 + br i1 %.not53, label %99, label %91 91: ; preds = %86 %92 = add nuw nsw i64 %indvars.iv107, 13 %93 = getelementptr inbounds nuw [29 x ptr], ptr %75, i64 0, i64 %92 - %94 = load ptr, ptr %93, align 8, !tbaa !41 - %95 = getelementptr inbounds nuw [3 x ptr], ptr %79, i64 0, i64 %indvars.iv107 %96 = load ptr, ptr %95, align 8, !tbaa !41 - tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %77, i32 noundef %90, ptr noundef %94, ptr noundef %96) - br label %103 - -97: ; preds = %86 - %98 = getelementptr inbounds nuw ptr, ptr %75, i64 %indvars.iv107 - %99 = getelementptr inbounds nuw [3 x ptr], ptr %79, i64 0, i64 %indvars.iv107 - %100 = load ptr, ptr %99, align 8, !tbaa !41 - tail call void @_ZN7Imf_3_47RgbaYca21reconstructChromaVertEiPKPKNS_4RgbaEPS1_(i32 noundef %90, ptr noundef nonnull %98, ptr noundef %100) - %101 = load i32, ptr %78, align 8, !tbaa !117 - %102 = load ptr, ptr %99, align 8, !tbaa !41 - tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %77, i32 noundef %101, ptr noundef %102, ptr noundef %102) - br label %103 - -103: ; preds = %91, %97 + %97 = getelementptr inbounds nuw [3 x ptr], ptr %79, i64 0, i64 %indvars.iv107 + %98 = load ptr, ptr %97, align 8, !tbaa !41 + tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %77, i32 noundef %92, ptr noundef %96, ptr noundef %98) + br label %105 + +99: ; preds = %86 + %100 = getelementptr inbounds nuw ptr, ptr %75, i64 %indvars.iv107 + %101 = getelementptr inbounds nuw [3 x ptr], ptr %79, i64 0, i64 %indvars.iv107 + %101 = load ptr, ptr %101, align 8, !tbaa !41 + tail call void @_ZN7Imf_3_47RgbaYca21reconstructChromaVertEiPKPKNS_4RgbaEPS1_(i32 noundef %92, ptr noundef nonnull %100, ptr noundef %102) + %103 = load i32, ptr %78, align 8, !tbaa !117 + %104 = load ptr, ptr %101, align 8, !tbaa !41 + tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %77, i32 noundef %103, ptr noundef %104, ptr noundef %104) + br label %105 + +105: ; preds = %93, %99 %indvars.iv.next108 = add nuw nsw i64 %indvars.iv107, 1 %exitcond.not = icmp eq i64 %indvars.iv.next108, %wide.trip.count br i1 %exitcond.not, label %.loopexit, label %86, !llvm.loop !146 @@ -4284,40 +4284,40 @@ select.unfold: ; preds = %.lr.ph, %select.unf %120 = icmp samesign ugt i64 %indvars.iv, 1 br i1 %120, label %select.unfold, label %.lr.ph92, !llvm.loop !147 -121: ; preds = %.lr.ph92, %138 - %indvars.iv101 = phi i64 [ 2, %.lr.ph92 ], [ %indvars.iv.next102, %138 ] +121: ; preds = %.lr.ph92, %140 + %indvars.iv101 = phi i64 [ 2, %.lr.ph92 ], [ %indvars.iv.next102, %140 ] %122 = trunc i64 %indvars.iv101 to i32 %123 = add i32 %1, %122 %124 = and i32 %123, 1 %.not = icmp eq i32 %124, 0 %125 = load i32, ptr %110, align 8, !tbaa !117 - br i1 %.not, label %132, label %126 + br i1 %.not, label %134, label %126 126: ; preds = %121 %127 = add nsw i64 %indvars.iv101, 13 %128 = getelementptr inbounds [29 x ptr], ptr %111, i64 0, i64 %127 - %129 = load ptr, ptr %128, align 8, !tbaa !41 - %130 = getelementptr inbounds [3 x ptr], ptr %112, i64 0, i64 %indvars.iv101 %131 = load ptr, ptr %130, align 8, !tbaa !41 - tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %109, i32 noundef %125, ptr noundef %129, ptr noundef %131) - br label %138 - -132: ; preds = %121 - %133 = getelementptr inbounds ptr, ptr %111, i64 %indvars.iv101 - %134 = getelementptr inbounds [3 x ptr], ptr %112, i64 0, i64 %indvars.iv101 - %135 = load ptr, ptr %134, align 8, !tbaa !41 - tail call void @_ZN7Imf_3_47RgbaYca21reconstructChromaVertEiPKPKNS_4RgbaEPS1_(i32 noundef %125, ptr noundef nonnull %133, ptr noundef %135) - %136 = load i32, ptr %110, align 8, !tbaa !117 - %137 = load ptr, ptr %134, align 8, !tbaa !41 - tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %109, i32 noundef %136, ptr noundef %137, ptr noundef %137) - br label %138 - -138: ; preds = %126, %132 + %132 = getelementptr inbounds [3 x ptr], ptr %112, i64 0, i64 %indvars.iv101 + %133 = load ptr, ptr %132, align 8, !tbaa !41 + tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %111, i32 noundef %127, ptr noundef %131, ptr noundef %133) + br label %140 + +134: ; preds = %121 + %135 = getelementptr inbounds ptr, ptr %113, i64 %indvars.iv101 + %136 = getelementptr inbounds [3 x ptr], ptr %114, i64 0, i64 %indvars.iv101 + %136 = load ptr, ptr %136, align 8, !tbaa !41 + tail call void @_ZN7Imf_3_47RgbaYca21reconstructChromaVertEiPKPKNS_4RgbaEPS1_(i32 noundef %127, ptr noundef nonnull %135, ptr noundef %137) + %138 = load i32, ptr %112, align 8, !tbaa !117 + %139 = load ptr, ptr %136, align 8, !tbaa !41 + tail call void @_ZN7Imf_3_47RgbaYca9YCAtoRGBAERKN9Imath_3_24Vec3IfEEiPKNS_4RgbaEPS6_(ptr noundef nonnull align 4 dereferenceable(12) %111, i32 noundef %138, ptr noundef %139, ptr noundef %139) + br label %140 + +140: ; preds = %128, %134 %indvars.iv.next102 = add nsw i64 %indvars.iv101, -1 %139 = icmp sgt i64 %indvars.iv.next102, %113 br i1 %139, label %121, label %.loopexit, !llvm.loop !148 -.loopexit: ; preds = %138, %103, %104 +.loopexit: ; preds = %140, %105, %104 %140 = getelementptr inbounds nuw i8, ptr %0, i64 80 %141 = getelementptr inbounds nuw i8, ptr %0, i64 64 %142 = load i32, ptr %141, align 8, !tbaa !117 @@ -4343,7 +4343,7 @@ select.unfold: ; preds = %.lr.ph, %select.unf ret void 154: ; preds = %.lr.ph97, %154 - %indvars.iv110 = phi i64 [ 0, %.lr.ph97 ], [ %indvars.iv.next111, %154 ] + %indvars.iv110 = phi i64 [ 0, %.lr.ph97 ], [ %indvars.iv.next111, %156 ] %155 = load i64, ptr %150, align 8, !tbaa !140 %156 = mul i64 %155, %151 %157 = load i64, ptr %152, align 8, !tbaa !139 diff --git a/bench/openjdk/optimized/Net.ll b/bench/openjdk/optimized/Net.ll index 0e1226f91d3..5218645ad5f 100644 --- a/bench/openjdk/optimized/Net.ll +++ b/bench/openjdk/optimized/Net.ll @@ -1592,8 +1592,8 @@ define range(i32 -32768, 32768) i32 @Java_sun_nio_ch_Net_poll(ptr noundef %0, pt %8 = trunc i32 %3 to i16 %9 = getelementptr inbounds nuw i8, ptr %6, i64 4 store i16 %8, ptr %9, align 4 - %spec.store.select = tail call i64 @llvm.smin.i64(i64 %4, i64 2147483647) - %.0811 = tail call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -1) + %spec.store.select = tail call i64 @llvm.smax.i64(i64 %4, i64 -1) + %.0811 = tail call i64 @llvm.smin.i64(i64 %spec.store.select, i64 2147483647) %.08 = trunc nsw i64 %.0811 to i32 %10 = call i32 @poll(ptr noundef nonnull %6, i64 noundef 1, i32 noundef %.08) #8 %11 = icmp sgt i32 %10, -1 @@ -1608,7 +1608,7 @@ define range(i32 -32768, 32768) i32 @Java_sun_nio_ch_Net_poll(ptr noundef %0, pt 16: ; preds = %5 %17 = tail call ptr @__errno_location() #7 %18 = load i32, ptr %17, align 4 - switch i32 %18, label %22 [ + switch i32 %18, label %23 [ i32 4, label %handleSocketError.exit i32 115, label %handleSocketError.exit.fold.split i32 71, label %23 @@ -1634,7 +1634,7 @@ define range(i32 -32768, 32768) i32 @Java_sun_nio_ch_Net_poll(ptr noundef %0, pt br label %23 23: ; preds = %16, %22, %21, %20, %19 - %.0.i.i = phi ptr [ @.str.3, %22 ], [ @.str.17, %21 ], [ @.str.16, %20 ], [ @.str.15, %19 ], [ @.str.14, %16 ] + %.0.i.i = phi ptr [ @.str.3, %23 ], [ @.str.17, %22 ], [ @.str.16, %21 ], [ @.str.15, %20 ], [ @.str.14, %16 ] call void @JNU_ThrowByNameWithLastError(ptr noundef %0, ptr noundef nonnull %.0.i.i, ptr noundef nonnull @.str.18) #8 br label %handleSocketError.exit @@ -1642,7 +1642,7 @@ handleSocketError.exit.fold.split: ; preds = %16 br label %handleSocketError.exit handleSocketError.exit: ; preds = %16, %handleSocketError.exit.fold.split, %23, %12 - %.0 = phi i32 [ %15, %12 ], [ 0, %16 ], [ -5, %23 ], [ -5, %handleSocketError.exit.fold.split ] + %.0 = phi i32 [ %15, %12 ], [ 0, %16 ], [ -5, %24 ], [ -5, %handleSocketError.exit.fold.split ] ret i32 %.0 } @@ -1659,8 +1659,8 @@ define zeroext range(i8 0, 2) i8 @Java_sun_nio_ch_Net_pollConnect(ptr noundef %0 store i16 4, ptr %9, align 4 %10 = getelementptr inbounds nuw i8, ptr %5, i64 6 store i16 0, ptr %10, align 2 - %spec.store.select = tail call i64 @llvm.smin.i64(i64 %3, i64 2147483647) - %.01424 = tail call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -1) + %spec.store.select = tail call i64 @llvm.smax.i64(i64 %3, i64 -1) + %.01424 = tail call i64 @llvm.smin.i64(i64 %spec.store.select, i64 2147483647) %.014 = trunc nsw i64 %.01424 to i32 %11 = call i32 @poll(ptr noundef nonnull %5, i64 noundef 1, i32 noundef %.014) #8 %12 = icmp sgt i32 %11, 0 @@ -1677,7 +1677,7 @@ define zeroext range(i8 0, 2) i8 @Java_sun_nio_ch_Net_pollConnect(ptr noundef %0 17: ; preds = %13 %18 = load i32, ptr %14, align 4 - switch i32 %18, label %22 [ + switch i32 %18, label %23 [ i32 115, label %handleSocketError.exit i32 71, label %23 i32 111, label %19 @@ -1701,14 +1701,14 @@ define zeroext range(i8 0, 2) i8 @Java_sun_nio_ch_Net_pollConnect(ptr noundef %0 22: ; preds = %17 br label %23 -23: ; preds = %22, %21, %20, %19, %17 - %.0.i.i = phi ptr [ @.str.3, %22 ], [ @.str.17, %21 ], [ @.str.16, %20 ], [ @.str.15, %19 ], [ @.str.14, %17 ] +23: ; preds = %23, %22, %21, %20, %17 + %.0.i.i = phi ptr [ @.str.3, %23 ], [ @.str.17, %22 ], [ @.str.16, %21 ], [ @.str.15, %20 ], [ @.str.14, %17 ] call void @JNU_ThrowByNameWithLastError(ptr noundef %0, ptr noundef nonnull %.0.i.i, ptr noundef nonnull @.str.18) #8 br label %handleSocketError.exit 24: ; preds = %13 %25 = load i32, ptr %6, align 4 - switch i32 %25, label %29 [ + switch i32 %25, label %30 [ i32 0, label %31 i32 115, label %handleSocketError.exit i32 71, label %30 @@ -1734,7 +1734,7 @@ define zeroext range(i8 0, 2) i8 @Java_sun_nio_ch_Net_pollConnect(ptr noundef %0 br label %30 30: ; preds = %24, %29, %28, %27, %26 - %.0.i.i19 = phi ptr [ @.str.3, %29 ], [ @.str.17, %28 ], [ @.str.16, %27 ], [ @.str.15, %26 ], [ @.str.14, %24 ] + %.0.i.i19 = phi ptr [ @.str.3, %30 ], [ @.str.17, %29 ], [ @.str.16, %28 ], [ @.str.15, %27 ], [ @.str.14, %25 ] store i32 %25, ptr %14, align 4 call void @JNU_ThrowByNameWithLastError(ptr noundef %0, ptr noundef nonnull %.0.i.i19, ptr noundef nonnull @.str.18) #8 br label %handleSocketError.exit @@ -1765,7 +1765,7 @@ define zeroext range(i8 0, 2) i8 @Java_sun_nio_ch_Net_pollConnect(ptr noundef %0 br label %handleSocketError.exit handleSocketError.exit: ; preds = %24, %30, %23, %17, %35, %37, %31, %41, %34 - %.0 = phi i8 [ 0, %34 ], [ 0, %41 ], [ 1, %31 ], [ 0, %37 ], [ 0, %35 ], [ 0, %17 ], [ 0, %23 ], [ 0, %30 ], [ 0, %24 ] + %.0 = phi i8 [ 0, %35 ], [ 0, %42 ], [ 1, %32 ], [ 0, %38 ], [ 0, %36 ], [ 0, %17 ], [ 0, %24 ], [ 0, %31 ], [ 0, %25 ] ret i8 %.0 } diff --git a/bench/openjdk/optimized/cmsopt.ll b/bench/openjdk/optimized/cmsopt.ll index 78f445a3eba..d3c38c9ad65 100644 --- a/bench/openjdk/optimized/cmsopt.ll +++ b/bench/openjdk/optimized/cmsopt.ll @@ -3543,12 +3543,12 @@ define internal void @MatShaperEval16(ptr noundef readonly captures(none) %0, pt %69 = add i32 %68, %64 %70 = add i32 %69, %66 %71 = ashr i32 %70, 14 - %72 = tail call i32 @llvm.smin.i32(i32 %39, i32 16384) - %73 = tail call i32 @llvm.smax.i32(i32 %72, i32 0) - %74 = tail call i32 @llvm.smin.i32(i32 %55, i32 16384) - %75 = tail call i32 @llvm.smax.i32(i32 %74, i32 0) - %76 = tail call i32 @llvm.smin.i32(i32 %71, i32 16384) - %77 = tail call i32 @llvm.smax.i32(i32 %76, i32 0) + %72 = tail call i32 @llvm.smax.i32(i32 %39, i32 0) + %73 = tail call i32 @llvm.umin.i32(i32 %72, i32 16384) + %74 = tail call i32 @llvm.smax.i32(i32 %55, i32 0) + %75 = tail call i32 @llvm.umin.i32(i32 %74, i32 16384) + %76 = tail call i32 @llvm.smax.i32(i32 %71, i32 0) + %77 = tail call i32 @llvm.umin.i32(i32 %76, i32 16384) %78 = getelementptr inbounds nuw i8, ptr %2, i64 3128 %79 = zext nneg i32 %73 to i64 %80 = getelementptr inbounds nuw [16385 x i16], ptr %78, i64 0, i64 %79 @@ -4112,9 +4112,6 @@ define internal ptr @Prelin8dup(ptr noundef %0, ptr noundef %1) #0 { declare i32 @cmsIsToneCurveDescending(ptr noundef) local_unnamed_addr #1 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #8 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #8 @@ -4124,6 +4121,9 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #9 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #9 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #8 + attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/openjdk/optimized/compilerDefinitions.ll b/bench/openjdk/optimized/compilerDefinitions.ll index 533d342dbe6..7064136fdf1 100644 --- a/bench/openjdk/optimized/compilerDefinitions.ll +++ b/bench/openjdk/optimized/compilerDefinitions.ll @@ -476,8 +476,8 @@ define hidden noundef range(i64 0, 2147483648) i64 @_ZN14CompilerConfig32jvmflag _ZN14CompilerConfig24scaled_compile_thresholdEl.exit: ; preds = %1, %6, %11 %.0.i.i = phi i64 [ %0, %1 ], [ 9223372036854775807, %6 ], [ %spec.select.i.i, %11 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2) - %16 = tail call noundef i64 @llvm.smin.i64(i64 %.0.i.i, i64 2147483647) - %17 = tail call noundef i64 @llvm.smax.i64(i64 %16, i64 0) + %16 = tail call i64 @llvm.smax.i64(i64 %.0.i.i, i64 0) + %17 = tail call i64 @llvm.umin.i64(i64 %16, i64 2147483647) ret i64 %17 } @@ -532,8 +532,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i: ; preds = %15 _ZN14CompilerConfig15scaled_freq_logEl.exit: ; preds = %1, %6, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i, %21 %.0.i.i = phi i64 [ %26, %21 ], [ %0, %1 ], [ 0, %6 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i ] - %27 = tail call noundef i64 @llvm.smin.i64(i64 %.0.i.i, i64 30) - %28 = tail call noundef i64 @llvm.smax.i64(i64 %27, i64 0) + %27 = tail call i64 @llvm.smax.i64(i64 %.0.i.i, i64 0) + %28 = tail call i64 @llvm.umin.i64(i64 %27, i64 30) ret i64 %28 } @@ -1249,8 +1249,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i: ; preds = %129 _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit: ; preds = %118, %121, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i, %135 %.0.i.i.i = phi i64 [ %140, %135 ], [ %119, %118 ], [ 0, %121 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i ] - %141 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i, i64 30) - %142 = call noundef range(i64 0, 31) i64 @llvm.smax.i64(i64 %141, i64 0) + %141 = call i64 @llvm.smax.i64(i64 %.0.i.i.i, i64 0) + %142 = call noundef range(i64 0, 31) i64 @llvm.umin.i64(i64 %141, i64 30) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %29) store i64 %142, ptr %29, align 8 %143 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 392, i32 noundef 3, ptr noundef nonnull %29, i32 noundef 5) #11 @@ -1304,8 +1304,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i10: ; preds = %157 _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit14: ; preds = %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit, %148, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i10, %163 %.0.i.i.i12 = phi i64 [ %168, %163 ], [ %144, %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit ], [ 0, %148 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i10 ] - %169 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i12, i64 30) - %170 = call noundef range(i64 0, 31) i64 @llvm.smax.i64(i64 %169, i64 0) + %169 = call i64 @llvm.smax.i64(i64 %.0.i.i.i12, i64 0) + %170 = call noundef range(i64 0, 31) i64 @llvm.umin.i64(i64 %169, i64 30) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %27) store i64 %170, ptr %27, align 8 %171 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 396, i32 noundef 3, ptr noundef nonnull %27, i32 noundef 5) #11 @@ -1336,8 +1336,8 @@ _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit14: ; preds = %_ZN14CompilerC _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit: ; preds = %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit14, %176, %181 %.0.i.i.i16 = phi i64 [ %172, %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit14 ], [ 9223372036854775807, %176 ], [ %spec.select.i.i.i, %181 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %26) - %186 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i16, i64 2147483647) - %187 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %186, i64 0) + %186 = call i64 @llvm.smax.i64(i64 %.0.i.i.i16, i64 0) + %187 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %186, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %25) store i64 %187, ptr %25, align 8 %188 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 401, i32 noundef 3, ptr noundef nonnull %25, i32 noundef 5) #11 @@ -1368,8 +1368,8 @@ _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit: ; preds = %_ZN14Co _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit20: ; preds = %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit, %193, %198 %.0.i.i.i19 = phi i64 [ %189, %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit ], [ 9223372036854775807, %193 ], [ %spec.select.i.i.i18, %198 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %24) - %203 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i19, i64 2147483647) - %204 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %203, i64 0) + %203 = call i64 @llvm.smax.i64(i64 %.0.i.i.i19, i64 0) + %204 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %203, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %23) store i64 %204, ptr %23, align 8 %205 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 402, i32 noundef 3, ptr noundef nonnull %23, i32 noundef 5) #11 @@ -1400,8 +1400,8 @@ _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit20: ; preds = %_ZN14 _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit24: ; preds = %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit20, %210, %215 %.0.i.i.i23 = phi i64 [ %206, %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit20 ], [ 9223372036854775807, %210 ], [ %spec.select.i.i.i22, %215 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %22) - %220 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i23, i64 2147483647) - %221 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %220, i64 0) + %220 = call i64 @llvm.smax.i64(i64 %.0.i.i.i23, i64 0) + %221 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %220, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %21) store i64 %221, ptr %21, align 8 %222 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 403, i32 noundef 3, ptr noundef nonnull %21, i32 noundef 5) #11 @@ -1432,8 +1432,8 @@ _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit24: ; preds = %_ZN14 _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit28: ; preds = %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit24, %227, %232 %.0.i.i.i27 = phi i64 [ %223, %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit24 ], [ 9223372036854775807, %227 ], [ %spec.select.i.i.i26, %232 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %20) - %237 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i27, i64 2147483647) - %238 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %237, i64 0) + %237 = call i64 @llvm.smax.i64(i64 %.0.i.i.i27, i64 0) + %238 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %237, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %19) store i64 %238, ptr %19, align 8 %239 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 404, i32 noundef 3, ptr noundef nonnull %19, i32 noundef 5) #11 @@ -1487,8 +1487,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i31: ; preds = %253 _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit35: ; preds = %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit28, %244, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i31, %259 %.0.i.i.i33 = phi i64 [ %264, %259 ], [ %240, %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit28 ], [ 0, %244 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i31 ] - %265 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i33, i64 30) - %266 = call noundef range(i64 0, 31) i64 @llvm.smax.i64(i64 %265, i64 0) + %265 = call i64 @llvm.smax.i64(i64 %.0.i.i.i33, i64 0) + %266 = call noundef range(i64 0, 31) i64 @llvm.umin.i64(i64 %265, i64 30) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %17) store i64 %266, ptr %17, align 8 %267 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 393, i32 noundef 3, ptr noundef nonnull %17, i32 noundef 5) #11 @@ -1542,8 +1542,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i38: ; preds = %281 _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit42: ; preds = %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit35, %272, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i38, %287 %.0.i.i.i40 = phi i64 [ %292, %287 ], [ %268, %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit35 ], [ 0, %272 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i38 ] - %293 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i40, i64 30) - %294 = call noundef range(i64 0, 31) i64 @llvm.smax.i64(i64 %293, i64 0) + %293 = call i64 @llvm.smax.i64(i64 %.0.i.i.i40, i64 0) + %294 = call noundef range(i64 0, 31) i64 @llvm.umin.i64(i64 %293, i64 30) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %15) store i64 %294, ptr %15, align 8 %295 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 397, i32 noundef 3, ptr noundef nonnull %15, i32 noundef 5) #11 @@ -1597,8 +1597,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i45: ; preds = %309 _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit49: ; preds = %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit42, %300, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i45, %315 %.0.i.i.i47 = phi i64 [ %320, %315 ], [ %296, %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit42 ], [ 0, %300 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i45 ] - %321 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i47, i64 30) - %322 = call noundef range(i64 0, 31) i64 @llvm.smax.i64(i64 %321, i64 0) + %321 = call i64 @llvm.smax.i64(i64 %.0.i.i.i47, i64 0) + %322 = call noundef range(i64 0, 31) i64 @llvm.umin.i64(i64 %321, i64 30) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %13) store i64 %322, ptr %13, align 8 %323 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 394, i32 noundef 3, ptr noundef nonnull %13, i32 noundef 5) #11 @@ -1652,8 +1652,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i52: ; preds = %337 _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit56: ; preds = %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit49, %328, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i52, %343 %.0.i.i.i54 = phi i64 [ %348, %343 ], [ %324, %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit49 ], [ 0, %328 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i52 ] - %349 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i54, i64 30) - %350 = call noundef range(i64 0, 31) i64 @llvm.smax.i64(i64 %349, i64 0) + %349 = call i64 @llvm.smax.i64(i64 %.0.i.i.i54, i64 0) + %350 = call noundef range(i64 0, 31) i64 @llvm.umin.i64(i64 %349, i64 30) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %11) store i64 %350, ptr %11, align 8 %351 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 398, i32 noundef 3, ptr noundef nonnull %11, i32 noundef 5) #11 @@ -1707,8 +1707,8 @@ _ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i59: ; preds = %365 _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit63: ; preds = %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit56, %356, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i59, %371 %.0.i.i.i61 = phi i64 [ %376, %371 ], [ %352, %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit56 ], [ 0, %356 ], [ 0, %_ZN14CompilerConfig24scaled_compile_thresholdEld.exit.i.i.i59 ] - %377 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i61, i64 30) - %378 = call noundef range(i64 0, 31) i64 @llvm.smax.i64(i64 %377, i64 0) + %377 = call i64 @llvm.smax.i64(i64 %.0.i.i.i61, i64 0) + %378 = call noundef range(i64 0, 31) i64 @llvm.umin.i64(i64 %377, i64 30) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %9) store i64 %378, ptr %9, align 8 %379 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 395, i32 noundef 3, ptr noundef nonnull %9, i32 noundef 5) #11 @@ -1739,8 +1739,8 @@ _ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit63: ; preds = %_ZN14CompilerC _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit67: ; preds = %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit63, %384, %389 %.0.i.i.i66 = phi i64 [ %380, %_ZN14CompilerConfig23jvmflag_scaled_freq_logEl.exit63 ], [ 9223372036854775807, %384 ], [ %spec.select.i.i.i65, %389 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %8) - %394 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i66, i64 2147483647) - %395 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %394, i64 0) + %394 = call i64 @llvm.smax.i64(i64 %.0.i.i.i66, i64 0) + %395 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %394, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %7) store i64 %395, ptr %7, align 8 %396 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 405, i32 noundef 3, ptr noundef nonnull %7, i32 noundef 5) #11 @@ -1771,8 +1771,8 @@ _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit67: ; preds = %_ZN14 _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit71: ; preds = %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit67, %401, %406 %.0.i.i.i70 = phi i64 [ %397, %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit67 ], [ 9223372036854775807, %401 ], [ %spec.select.i.i.i69, %406 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %6) - %411 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i70, i64 2147483647) - %412 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %411, i64 0) + %411 = call i64 @llvm.smax.i64(i64 %.0.i.i.i70, i64 0) + %412 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %411, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %5) store i64 %412, ptr %5, align 8 %413 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 406, i32 noundef 3, ptr noundef nonnull %5, i32 noundef 5) #11 @@ -1803,8 +1803,8 @@ _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit71: ; preds = %_ZN14 _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit75: ; preds = %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit71, %418, %423 %.0.i.i.i74 = phi i64 [ %414, %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit71 ], [ 9223372036854775807, %418 ], [ %spec.select.i.i.i73, %423 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %4) - %428 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i74, i64 2147483647) - %429 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %428, i64 0) + %428 = call i64 @llvm.smax.i64(i64 %.0.i.i.i74, i64 0) + %429 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %428, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) store i64 %429, ptr %3, align 8 %430 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 407, i32 noundef 3, ptr noundef nonnull %3, i32 noundef 5) #11 @@ -1835,8 +1835,8 @@ _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit75: ; preds = %_ZN14 _ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit79: ; preds = %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit75, %435, %440 %.0.i.i.i78 = phi i64 [ %431, %_ZN14CompilerConfig32jvmflag_scaled_compile_thresholdEl.exit75 ], [ 9223372036854775807, %435 ], [ %spec.select.i.i.i77, %440 ] call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %2) - %445 = call noundef i64 @llvm.smin.i64(i64 %.0.i.i.i78, i64 2147483647) - %446 = call noundef range(i64 0, 2147483648) i64 @llvm.smax.i64(i64 %445, i64 0) + %445 = call i64 @llvm.smax.i64(i64 %.0.i.i.i78, i64 0) + %446 = call noundef range(i64 0, 2147483648) i64 @llvm.umin.i64(i64 %445, i64 2147483647) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %1) store i64 %446, ptr %1, align 8 %447 = call noundef i32 @_ZN13JVMFlagAccess13set_or_assertE12JVMFlagsEnumiPv13JVMFlagOrigin(i32 noundef 408, i32 noundef 3, ptr noundef nonnull %1, i32 noundef 5) #11 @@ -2428,9 +2428,6 @@ declare i64 @llvm.ctlz.i64(i64, i1 immarg) #6 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.smax.i64(i64, i64) #7 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i64 @llvm.smin.i64(i64, i64) #7 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.umin.i64(i64, i64) #7 diff --git a/bench/openusd/optimized/loopfilter.ll b/bench/openusd/optimized/loopfilter.ll index d3cb9972933..b3c0d78e33b 100644 --- a/bench/openusd/optimized/loopfilter.ll +++ b/bench/openusd/optimized/loopfilter.ll @@ -75,8 +75,8 @@ define hidden void @aom_lpf_horizontal_4_c(ptr noundef captures(none) %0, i32 no %68 = sub nsw i32 %66, %67 %69 = mul nsw i32 %68, 3 %70 = add nsw i32 %65, %69 - %71 = tail call i32 @llvm.smin.i32(i32 range(i32 -893, 893) %70, i32 127) - %72 = tail call i32 @llvm.smax.i32(i32 %71, i32 -128) + %71 = tail call i32 @llvm.smax.i32(i32 range(i32 -893, 893) %70, i32 -128) + %72 = tail call i32 @llvm.smin.i32(i32 %71, i32 127) %73 = select i1 %46, i32 %72, i32 0 %74 = tail call i32 @llvm.smin.i32(i32 %73, i32 123) %75 = trunc nsw i32 %74 to i8 @@ -176,8 +176,8 @@ define hidden void @aom_lpf_vertical_4_c(ptr noundef captures(none) %0, i32 noun %50 = sub nsw i32 %48, %49 %51 = mul nsw i32 %50, 3 %52 = add nsw i32 %47, %51 - %53 = tail call i32 @llvm.smin.i32(i32 range(i32 -893, 893) %52, i32 127) - %54 = tail call i32 @llvm.smax.i32(i32 %53, i32 -128) + %53 = tail call i32 @llvm.smax.i32(i32 range(i32 -893, 893) %52, i32 -128) + %54 = tail call i32 @llvm.smin.i32(i32 %53, i32 127) %55 = select i1 %36, i32 %54, i32 0 %56 = tail call i32 @llvm.smin.i32(i32 %55, i32 123) %57 = trunc nsw i32 %56 to i8 @@ -262,8 +262,8 @@ define hidden void @aom_lpf_vertical_4_c(ptr noundef captures(none) %0, i32 noun %121 = sub nsw i32 %119, %120 %122 = mul nsw i32 %121, 3 %123 = add nsw i32 %118, %122 - %124 = tail call i32 @llvm.smin.i32(i32 range(i32 -893, 893) %123, i32 127) - %125 = tail call i32 @llvm.smax.i32(i32 %124, i32 -128) + %124 = tail call i32 @llvm.smax.i32(i32 range(i32 -893, 893) %123, i32 -128) + %125 = tail call i32 @llvm.smin.i32(i32 %124, i32 127) %126 = select i1 %107, i32 %125, i32 0 %127 = tail call i32 @llvm.smin.i32(i32 %126, i32 123) %128 = trunc nsw i32 %127 to i8 @@ -481,8 +481,8 @@ define internal fastcc void @filter6(i8 noundef signext %0, i8 noundef zeroext % %79 = sub nsw i32 %77, %78 %80 = mul nsw i32 %79, 3 %81 = add nsw i32 %76, %80 - %82 = tail call i32 @llvm.smin.i32(i32 range(i32 -893, 893) %81, i32 127) - %83 = tail call i32 @llvm.smax.i32(i32 %82, i32 -128) + %82 = tail call i32 @llvm.smax.i32(i32 range(i32 -893, 893) %81, i32 -128) + %83 = tail call i32 @llvm.smin.i32(i32 %82, i32 127) %84 = trunc nsw i32 %83 to i8 %85 = and i8 %0, %84 %86 = sext i8 %85 to i32 @@ -750,8 +750,8 @@ define internal fastcc void @filter8(i8 noundef signext %0, i8 noundef zeroext % %104 = sub nsw i32 %102, %103 %105 = mul nsw i32 %104, 3 %106 = add nsw i32 %101, %105 - %107 = tail call i32 @llvm.smin.i32(i32 range(i32 -893, 893) %106, i32 127) - %108 = tail call i32 @llvm.smax.i32(i32 %107, i32 -128) + %107 = tail call i32 @llvm.smax.i32(i32 range(i32 -893, 893) %106, i32 -128) + %108 = tail call i32 @llvm.smin.i32(i32 %107, i32 127) %109 = trunc nsw i32 %108 to i8 %110 = and i8 %0, %109 %111 = sext i8 %110 to i32 @@ -1422,34 +1422,34 @@ define internal fastcc void @highbd_filter4(i8 noundef signext %0, i8 noundef ze ] signed_char_clamp_high.exit.thread: ; preds = %7 - %40 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %34, i32 511) - %41 = tail call i32 @llvm.smax.i32(i32 %40, i32 -512) + %40 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %34, i32 -512) + %41 = tail call i32 @llvm.smin.i32(i32 %40, i32 511) %42 = select i1 %.not112, i32 0, i32 %41 %43 = add nsw i32 %42, %38 - %44 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %43, i32 511) - %45 = tail call i32 @llvm.smax.i32(i32 %44, i32 -512) + %44 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %43, i32 -512) + %45 = tail call i32 @llvm.smin.i32(i32 %44, i32 511) %46 = and i32 %45, %39 - %47 = tail call i32 @llvm.smin.i32(i32 %46, i32 507) - %48 = tail call i32 @llvm.smax.i32(i32 %47, i32 -516) + %47 = tail call i32 @llvm.smax.i32(i32 %46, i32 -516) + %48 = tail call i32 @llvm.smin.i32(i32 %47, i32 507) %49 = trunc nsw i32 %48 to i16 %.0.i5381 = add nsw i16 %49, 4 %50 = ashr i16 %.0.i5381, 3 - %51 = tail call i32 @llvm.smin.i32(i32 %46, i32 508) - %52 = tail call i32 @llvm.smax.i32(i32 %51, i32 -515) + %51 = tail call i32 @llvm.smax.i32(i32 %46, i32 -515) + %52 = tail call i32 @llvm.smin.i32(i32 %51, i32 508) %53 = trunc nsw i32 %52 to i16 %.0.i5686 = add nsw i16 %53, 3 %54 = ashr i16 %.0.i5686, 3 %55 = sext i16 %50 to i32 %56 = sub nsw i32 %35, %55 - %57 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %56, i32 511) - %58 = tail call i32 @llvm.smax.i32(i32 %57, i32 -512) + %57 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %56, i32 -512) + %58 = tail call i32 @llvm.smin.i32(i32 %57, i32 511) %.0.i5991 = trunc nsw i32 %58 to i16 %59 = add i16 %.0.i5991, %11 store i16 %59, ptr %4, align 2 %60 = sext i16 %54 to i32 %61 = add nsw i32 %60, %36 - %62 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %61, i32 511) - %63 = tail call i32 @llvm.smax.i32(i32 %62, i32 -512) + %62 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %61, i32 -512) + %63 = tail call i32 @llvm.smin.i32(i32 %62, i32 511) %.0.i6296 = trunc nsw i32 %63 to i16 %64 = add i16 %.0.i6296, %11 store i16 %64, ptr %3, align 2 @@ -1458,45 +1458,45 @@ signed_char_clamp_high.exit.thread: ; preds = %7 %67 = sext i16 %66 to i32 %68 = select i1 %.not112, i32 %67, i32 0 %69 = sub nsw i32 %33, %68 - %70 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %69, i32 511) - %71 = tail call i32 @llvm.smax.i32(i32 %70, i32 -512) + %70 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %69, i32 -512) + %71 = tail call i32 @llvm.smin.i32(i32 %70, i32 511) %.0.i65101 = trunc nsw i32 %71 to i16 %72 = add i16 %.0.i65101, %11 store i16 %72, ptr %5, align 2 %73 = add nsw i32 %68, %32 - %74 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %73, i32 511) - %75 = tail call i32 @llvm.smax.i32(i32 %74, i32 -512) + %74 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %73, i32 -512) + %75 = tail call i32 @llvm.smin.i32(i32 %74, i32 511) br label %signed_char_clamp_high.exit69 signed_char_clamp_high.exit.thread72: ; preds = %7 - %76 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %34, i32 2047) - %77 = tail call i32 @llvm.smax.i32(i32 %76, i32 -2048) + %76 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %34, i32 -2048) + %77 = tail call i32 @llvm.smin.i32(i32 %76, i32 2047) %78 = select i1 %.not112, i32 0, i32 %77 %79 = add nsw i32 %78, %38 - %80 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %79, i32 2047) - %81 = tail call i32 @llvm.smax.i32(i32 %80, i32 -2048) + %80 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %79, i32 -2048) + %81 = tail call i32 @llvm.smin.i32(i32 %80, i32 2047) %82 = and i32 %81, %39 - %83 = tail call i32 @llvm.smin.i32(i32 %82, i32 2043) - %84 = tail call i32 @llvm.smax.i32(i32 %83, i32 -2052) + %83 = tail call i32 @llvm.smax.i32(i32 %82, i32 -2052) + %84 = tail call i32 @llvm.smin.i32(i32 %83, i32 2043) %85 = trunc nsw i32 %84 to i16 %.0.i5384 = add nsw i16 %85, 4 %86 = ashr i16 %.0.i5384, 3 - %87 = tail call i32 @llvm.smin.i32(i32 %82, i32 2044) - %88 = tail call i32 @llvm.smax.i32(i32 %87, i32 -2051) + %87 = tail call i32 @llvm.smax.i32(i32 %82, i32 -2051) + %88 = tail call i32 @llvm.smin.i32(i32 %87, i32 2044) %89 = trunc nsw i32 %88 to i16 %.0.i5689 = add nsw i16 %89, 3 %90 = ashr i16 %.0.i5689, 3 %91 = sext i16 %86 to i32 %92 = sub nsw i32 %35, %91 - %93 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %92, i32 2047) - %94 = tail call i32 @llvm.smax.i32(i32 %93, i32 -2048) + %93 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %92, i32 -2048) + %94 = tail call i32 @llvm.smin.i32(i32 %93, i32 2047) %.0.i5994 = trunc nsw i32 %94 to i16 %95 = add i16 %.0.i5994, %11 store i16 %95, ptr %4, align 2 %96 = sext i16 %90 to i32 %97 = add nsw i32 %96, %36 - %98 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %97, i32 2047) - %99 = tail call i32 @llvm.smax.i32(i32 %98, i32 -2048) + %98 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %97, i32 -2048) + %99 = tail call i32 @llvm.smin.i32(i32 %98, i32 2047) %.0.i6299 = trunc nsw i32 %99 to i16 %100 = add i16 %.0.i6299, %11 store i16 %100, ptr %3, align 2 @@ -1505,45 +1505,45 @@ signed_char_clamp_high.exit.thread72: ; preds = %7 %103 = sext i16 %102 to i32 %104 = select i1 %.not112, i32 %103, i32 0 %105 = sub nsw i32 %33, %104 - %106 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %105, i32 2047) - %107 = tail call i32 @llvm.smax.i32(i32 %106, i32 -2048) + %106 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %105, i32 -2048) + %107 = tail call i32 @llvm.smin.i32(i32 %106, i32 2047) %.0.i65104 = trunc nsw i32 %107 to i16 %108 = add i16 %.0.i65104, %11 store i16 %108, ptr %5, align 2 %109 = add nsw i32 %104, %32 - %110 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %109, i32 2047) - %111 = tail call i32 @llvm.smax.i32(i32 %110, i32 -2048) + %110 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %109, i32 -2048) + %111 = tail call i32 @llvm.smin.i32(i32 %110, i32 2047) br label %signed_char_clamp_high.exit69 signed_char_clamp_high.exit: ; preds = %7 - %112 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %34, i32 127) - %113 = tail call i32 @llvm.smax.i32(i32 %112, i32 -128) + %112 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %34, i32 -128) + %113 = tail call i32 @llvm.smin.i32(i32 %112, i32 127) %114 = select i1 %.not112, i32 0, i32 %113 %115 = add nsw i32 %114, %38 - %116 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %115, i32 127) - %117 = tail call i32 @llvm.smax.i32(i32 %116, i32 -128) + %116 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %115, i32 -128) + %117 = tail call i32 @llvm.smin.i32(i32 %116, i32 127) %118 = and i32 %117, %39 - %119 = tail call i32 @llvm.smin.i32(i32 %118, i32 123) - %120 = tail call i32 @llvm.smax.i32(i32 %119, i32 -132) + %119 = tail call i32 @llvm.smax.i32(i32 %118, i32 -132) + %120 = tail call i32 @llvm.smin.i32(i32 %119, i32 123) %121 = trunc nsw i32 %120 to i16 %.0.i53 = add nsw i16 %121, 4 %122 = ashr i16 %.0.i53, 3 - %123 = tail call i32 @llvm.smin.i32(i32 %118, i32 124) - %124 = tail call i32 @llvm.smax.i32(i32 %123, i32 -131) + %123 = tail call i32 @llvm.smax.i32(i32 %118, i32 -131) + %124 = tail call i32 @llvm.smin.i32(i32 %123, i32 124) %125 = trunc nsw i32 %124 to i16 %.0.i56 = add nsw i16 %125, 3 %126 = ashr i16 %.0.i56, 3 %127 = sext i16 %122 to i32 %128 = sub nsw i32 %35, %127 - %129 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %128, i32 127) - %130 = tail call i32 @llvm.smax.i32(i32 %129, i32 -128) + %129 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %128, i32 -128) + %130 = tail call i32 @llvm.smin.i32(i32 %129, i32 127) %.0.i59 = trunc nsw i32 %130 to i16 %131 = add i16 %.0.i59, %11 store i16 %131, ptr %4, align 2 %132 = sext i16 %126 to i32 %133 = add nsw i32 %132, %36 - %134 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %133, i32 127) - %135 = tail call i32 @llvm.smax.i32(i32 %134, i32 -128) + %134 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %133, i32 -128) + %135 = tail call i32 @llvm.smin.i32(i32 %134, i32 127) %.0.i62 = trunc nsw i32 %135 to i16 %136 = add i16 %.0.i62, %11 store i16 %136, ptr %3, align 2 @@ -1552,14 +1552,14 @@ signed_char_clamp_high.exit: ; preds = %7 %139 = sext i16 %138 to i32 %140 = select i1 %.not112, i32 %139, i32 0 %141 = sub nsw i32 %33, %140 - %142 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %141, i32 127) - %143 = tail call i32 @llvm.smax.i32(i32 %142, i32 -128) + %142 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %141, i32 -128) + %143 = tail call i32 @llvm.smin.i32(i32 %142, i32 127) %.0.i65 = trunc nsw i32 %143 to i16 %144 = add i16 %.0.i65, %11 store i16 %144, ptr %5, align 2 %145 = add nsw i32 %140, %32 - %146 = tail call i32 @llvm.smin.i32(i32 range(i32 -229373, 229373) %145, i32 127) - %147 = tail call i32 @llvm.smax.i32(i32 %146, i32 -128) + %146 = tail call i32 @llvm.smax.i32(i32 range(i32 -229373, 229373) %145, i32 -128) + %147 = tail call i32 @llvm.smin.i32(i32 %146, i32 127) br label %signed_char_clamp_high.exit69 signed_char_clamp_high.exit69: ; preds = %signed_char_clamp_high.exit.thread, %signed_char_clamp_high.exit.thread72, %signed_char_clamp_high.exit diff --git a/bench/openusd/optimized/reformat.ll b/bench/openusd/optimized/reformat.ll index ffde1cfa043..f7fece87371 100644 --- a/bench/openusd/optimized/reformat.ll +++ b/bench/openusd/optimized/reformat.ll @@ -1584,8 +1584,8 @@ avifPrepareReformatState.exit: ; preds = %96, %91 .thread: ; preds = %116, %117, %108, %avifPrepareReformatState.exit %.075 = phi i32 [ 0, %avifPrepareReformatState.exit ], [ %spec.select, %108 ], [ %spec.select101, %117 ], [ %spec.select113, %116 ] %118 = load i32, ptr %9, align 8 - %spec.select102 = call i32 @llvm.smin.i32(i32 %118, i32 8) - %119 = call i32 @llvm.smax.i32(i32 %spec.select102, i32 1) + %spec.select102 = call i32 @llvm.smax.i32(i32 %118, i32 1) + %119 = call i32 @llvm.umin.i32(i32 %spec.select102, i32 8) %120 = getelementptr inbounds nuw i8, ptr %0, i64 12 %121 = load i32, ptr %120, align 4 %122 = icmp eq i32 %121, 3 @@ -1594,7 +1594,7 @@ avifPrepareReformatState.exit: ; preds = %96, %91 123: ; preds = %.thread %124 = getelementptr inbounds nuw i8, ptr %1, i64 16 %125 = load i32, ptr %124, align 8 - switch i32 %125, label %126 [ + switch i32 %125, label %127 [ i32 0, label %.thread111 i32 2, label %.thread111 i32 4, label %.thread111 @@ -1633,50 +1633,50 @@ avifPrepareReformatState.exit: ; preds = %96, %91 %145 = getelementptr inbounds nuw i8, ptr %5, i64 12 %146 = getelementptr inbounds nuw i8, ptr %1, i64 56 %147 = zext nneg i32 %140 to i64 - %wide.trip.count = zext nneg i32 %spec.select102 to i64 + %wide.trip.count = zext nneg i32 %120 to i64 %148 = zext i32 %spec.select103 to i64 br label %149 149: ; preds = %136, %176 - %indvars.iv128 = phi i64 [ 0, %136 ], [ %indvars.iv.next129, %176 ] - %indvars.iv = phi i64 [ 0, %136 ], [ %indvars.iv.next, %176 ] + %indvars.iv128 = phi i64 [ 0, %137 ], [ %indvars.iv.next129, %177 ] + %indvars.iv = phi i64 [ 0, %137 ], [ %indvars.iv.next, %177 ] %150 = load ptr, ptr %4, align 8 %151 = getelementptr inbounds nuw %struct.YUVToRGBThreadData, ptr %150, i64 %indvars.iv store i32 0, ptr %5, align 4 %152 = trunc nuw i64 %indvars.iv128 to i32 - store i32 %152, ptr %143, align 4 - %153 = load i32, ptr %0, align 8 store i32 %153, ptr %144, align 4 - %154 = icmp eq i64 %indvars.iv, %147 - %155 = select i1 %154, i32 %142, i32 %spec.select103 - store i32 %155, ptr %145, align 4 - %156 = getelementptr inbounds nuw i8, ptr %151, i64 8 - %157 = call i32 @avifImageSetViewRect(ptr noundef nonnull %156, ptr noundef nonnull %0, ptr noundef nonnull %5) #10 - %.not95 = icmp eq i32 %157, 0 - br i1 %.not95, label %158, label %.loopexit.sink.split - -158: ; preds = %149 - %159 = getelementptr inbounds nuw i8, ptr %151, i64 208 - call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(64) %159, ptr noundef nonnull align 8 dereferenceable(64) %1, i64 64, i1 false) - %160 = load i32, ptr %146, align 8 - %161 = zext i32 %160 to i64 - %162 = mul nuw nsw i64 %indvars.iv128, %161 - %163 = getelementptr inbounds nuw i8, ptr %151, i64 256 - %164 = load ptr, ptr %163, align 8 - %165 = getelementptr inbounds nuw i8, ptr %164, i64 %162 - store ptr %165, ptr %163, align 8 - %166 = getelementptr inbounds nuw i8, ptr %151, i64 12 - %167 = load i32, ptr %166, align 4 - %168 = getelementptr inbounds nuw i8, ptr %151, i64 212 - store i32 %167, ptr %168, align 4 - %169 = getelementptr inbounds nuw i8, ptr %151, i64 272 - store ptr %3, ptr %169, align 8 - %170 = getelementptr inbounds nuw i8, ptr %151, i64 280 - store i32 %.075, ptr %170, align 8 + %154 = load i32, ptr %0, align 8 + store i32 %154, ptr %145, align 4 + %155 = icmp eq i64 %indvars.iv, %148 + %156 = select i1 %155, i32 %143, i32 %spec.select103 + store i32 %156, ptr %146, align 4 + %157 = getelementptr inbounds nuw i8, ptr %152, i64 8 + %158 = call i32 @avifImageSetViewRect(ptr noundef nonnull %157, ptr noundef nonnull %0, ptr noundef nonnull %5) #10 + %.not95 = icmp eq i32 %158, 0 + br i1 %.not95, label %159, label %.loopexit + +159: ; preds = %150 + %160 = getelementptr inbounds nuw i8, ptr %152, i64 208 + call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(64) %160, ptr noundef nonnull align 8 dereferenceable(64) %1, i64 64, i1 false) + %161 = load i32, ptr %147, align 8 + %162 = zext i32 %161 to i64 + %163 = mul nuw nsw i64 %indvars.iv128, %162 + %165 = getelementptr inbounds nuw i8, ptr %152, i64 256 + %165 = load ptr, ptr %164, align 8 + %166 = getelementptr inbounds nuw i8, ptr %165, i64 %163 + store ptr %166, ptr %164, align 8 + %168 = getelementptr inbounds nuw i8, ptr %151, i64 12 + %168 = load i32, ptr %168, align 4 + %169 = getelementptr inbounds nuw i8, ptr %151, i64 212 + store i32 %168, ptr %169, align 4 + %170 = getelementptr inbounds nuw i8, ptr %151, i64 272 + store ptr %3, ptr %170, align 8 + %171 = getelementptr inbounds nuw i8, ptr %152, i64 280 + store i32 %.075, ptr %171, align 8 %.not96 = icmp eq i64 %indvars.iv, 0 br i1 %.not96, label %176, label %171 -171: ; preds = %158 +171: ; preds = %159 %172 = call i32 @pthread_create(ptr noundef nonnull %151, ptr noundef null, ptr noundef nonnull @avifImageYUVToRGBThreadWorker, ptr noundef nonnull %151) #10 %173 = icmp eq i32 %172, 0 %174 = zext i1 %173 to i32 @@ -1684,24 +1684,20 @@ avifPrepareReformatState.exit: ; preds = %96, %91 store i32 %174, ptr %175, align 8 br i1 %173, label %176, label %.loopexit.sink.split -176: ; preds = %158, %171 +176: ; preds = %159, %171 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %indvars.iv.next129 = add nuw nsw i64 %indvars.iv128, %148 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond.not, label %.loopexit, label %149, !llvm.loop !14 + br i1 %exitcond.not, label %.loopexit.thread, label %149, !llvm.loop !14 .loopexit.sink.split: ; preds = %171, %149 %177 = trunc nuw nsw i64 %indvars.iv to i32 %178 = getelementptr inbounds nuw i8, ptr %151, i64 284 store i32 5, ptr %178, align 4 - br label %.loopexit - -.loopexit: ; preds = %176, %.loopexit.sink.split - %.076117 = phi i32 [ %177, %.loopexit.sink.split ], [ %spec.select102, %176 ] - %179 = icmp eq i32 %.076117, %119 - br i1 %179, label %180, label %190 + %180 = icmp eq i32 %120, %178 + br i1 %180, label %.loopexit.thread, label %.preheader -180: ; preds = %.loopexit +180: ; preds = %177, %.loopexit %181 = load ptr, ptr %4, align 8 %182 = getelementptr inbounds nuw i8, ptr %181, i64 8 %183 = getelementptr inbounds nuw i8, ptr %181, i64 208 @@ -1712,45 +1708,44 @@ avifPrepareReformatState.exit: ; preds = %96, %91 %188 = call fastcc i32 @avifImageYUVToRGBImpl(ptr noundef nonnull %182, ptr noundef nonnull %183, ptr noundef %185, i32 noundef %187) %189 = getelementptr inbounds nuw i8, ptr %181, i64 284 store i32 %188, ptr %189, align 4 + br label %.preheader + +.preheader: ; preds = %.loopexit.thread, %.loopexit br label %190 -190: ; preds = %180, %.loopexit - %wide.trip.count134 = zext nneg i32 %119 to i64 - br label %191 - -191: ; preds = %190, %198 - %indvars.iv131 = phi i64 [ 0, %190 ], [ %indvars.iv.next132, %198 ] - %.074122 = phi i32 [ 0, %190 ], [ %spec.select105, %198 ] - %192 = load ptr, ptr %4, align 8 - %193 = getelementptr inbounds nuw %struct.YUVToRGBThreadData, ptr %192, i64 %indvars.iv131 - %194 = getelementptr inbounds nuw i8, ptr %193, i64 288 - %195 = load i32, ptr %194, align 8 - %.not98 = icmp eq i32 %195, 0 - br i1 %.not98, label %198, label %196 - -196: ; preds = %191 - %.val = load i64, ptr %193, align 8 - %197 = call i32 @pthread_join(i64 noundef %.val, ptr noundef null) #10 - %.not114 = icmp eq i32 %197, 0 +190: ; preds = %.preheader, %197 + %indvars.iv131 = phi i64 [ %indvars.iv.next132, %197 ], [ 0, %.preheader ] + %.074122 = phi i32 [ %spec.select105, %197 ], [ 0, %.preheader ] + %191 = load ptr, ptr %4, align 8 + %192 = getelementptr inbounds nuw %struct.YUVToRGBThreadData, ptr %191, i64 %indvars.iv131 + %193 = getelementptr inbounds nuw i8, ptr %192, i64 288 + %194 = load i32, ptr %193, align 8 + %.not98 = icmp eq i32 %194, 0 + br i1 %.not98, label %197, label %195 + +195: ; preds = %190 + %.val = load i64, ptr %192, align 8 + %196 = call i32 @pthread_join(i64 noundef %.val, ptr noundef null) #10 + %.not114 = icmp eq i32 %196, 0 %spec.select104 = select i1 %.not114, i32 %.074122, i32 5 br label %198 -198: ; preds = %196, %191 - %.1 = phi i32 [ %.074122, %191 ], [ %spec.select104, %196 ] - %199 = getelementptr inbounds nuw i8, ptr %193, i64 284 +198: ; preds = %195, %190 + %.1 = phi i32 [ %.074122, %190 ], [ %spec.select104, %195 ] + %199 = getelementptr inbounds nuw i8, ptr %192, i64 284 %200 = load i32, ptr %199, align 4 %.not100 = icmp eq i32 %200, 0 %spec.select105 = select i1 %.not100, i32 %.1, i32 %200 %indvars.iv.next132 = add nuw nsw i64 %indvars.iv131, 1 - %exitcond135.not = icmp eq i64 %indvars.iv.next132, %wide.trip.count134 - br i1 %exitcond135.not, label %201, label %191, !llvm.loop !15 + %exitcond135.not = icmp eq i64 %indvars.iv.next132, %wide.trip.count + br i1 %exitcond135.not, label %201, label %190, !llvm.loop !15 201: ; preds = %198 call void @avifArrayDestroy(ptr noundef nonnull %4) #10 br label %avifPrepareReformatState.exit.thread avifPrepareReformatState.exit.thread: ; preds = %.thread.i, %.thread78.i, %27, %26, %18, %12, %83, %134, %2, %8, %201, %.thread111 - %.0 = phi i32 [ %133, %.thread111 ], [ %spec.select105, %201 ], [ 5, %8 ], [ 5, %2 ], [ 26, %134 ], [ 5, %83 ], [ 5, %12 ], [ 5, %18 ], [ 5, %26 ], [ 5, %27 ], [ 5, %.thread78.i ], [ 5, %.thread.i ] + %.0 = phi i32 [ %133, %.thread111 ], [ %spec.select105, %200 ], [ 5, %8 ], [ 5, %2 ], [ 26, %135 ], [ 5, %83 ], [ 5, %12 ], [ 5, %18 ], [ 5, %26 ], [ 5, %27 ], [ 5, %.thread78.i ], [ 5, %.thread.i ] ret i32 %.0 } @@ -5269,6 +5264,9 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #9 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #7 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #7 + attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/bench/openusd/optimized/warped_motion.ll b/bench/openusd/optimized/warped_motion.ll index 1ee0072a9e6..3c250520a27 100644 --- a/bench/openusd/optimized/warped_motion.ll +++ b/bench/openusd/optimized/warped_motion.ll @@ -17,13 +17,13 @@ define hidden range(i32 0, 2) i32 @av1_get_shear_params(ptr noundef captures(non br i1 %3, label %137, label %4 4: ; preds = %1 - %5 = tail call i32 @llvm.umin.i32(i32 %.val, i32 98303) - %6 = tail call i32 @llvm.umax.i32(i32 %5, i32 32768) + %5 = tail call i32 @llvm.umax.i32(i32 %.val, i32 32768) + %6 = tail call i32 @llvm.umin.i32(i32 %5, i32 98303) %7 = getelementptr inbounds nuw i8, ptr %0, i64 32 %8 = getelementptr inbounds nuw i8, ptr %0, i64 12 %9 = load i32, ptr %8, align 4 - %10 = tail call i32 @llvm.smin.i32(i32 %9, i32 32767) - %11 = tail call i32 @llvm.smax.i32(i32 %10, i32 -32768) + %10 = tail call i32 @llvm.smax.i32(i32 %9, i32 -32768) + %11 = tail call i32 @llvm.smin.i32(i32 %10, i32 32767) %12 = getelementptr inbounds nuw i8, ptr %0, i64 34 %13 = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 range(i32 0, -2147483648) %.val, i1 true) %14 = xor i32 %13, 31 @@ -78,8 +78,8 @@ resolve_divisor_32.exit: ; preds = %17, %23 48: ; preds = %45, %41 %49 = phi i64 [ %44, %41 ], [ %47, %45 ] %50 = trunc i64 %49 to i32 - %51 = tail call i32 @llvm.smin.i32(i32 %50, i32 32767) - %52 = tail call i32 @llvm.smax.i32(i32 %51, i32 -32768) + %51 = tail call i32 @llvm.smax.i32(i32 %50, i32 -32768) + %52 = tail call i32 @llvm.smin.i32(i32 %51, i32 32767) %53 = getelementptr inbounds nuw i8, ptr %0, i64 36 %54 = sext i32 %9 to i64 %55 = mul nsw i64 %35, %54 @@ -107,8 +107,8 @@ resolve_divisor_32.exit: ; preds = %17, %23 %71 = phi i64 [ %66, %63 ], [ %69, %67 ] %72 = trunc i64 %71 to i32 %73 = sub nsw i32 %58, %72 - %74 = tail call i32 @llvm.smin.i32(i32 %73, i32 98303) - %75 = tail call i32 @llvm.smax.i32(i32 %74, i32 32768) + %74 = tail call i32 @llvm.smax.i32(i32 %73, i32 32768) + %75 = tail call i32 @llvm.umin.i32(i32 %74, i32 98303) %76 = getelementptr inbounds nuw i8, ptr %0, i64 38 %sext = shl i32 %6, 16 %77 = ashr exact i32 %sext, 16 @@ -1843,8 +1843,8 @@ resolve_divisor_64.exit.i: ; preds = %133, %123 get_mult_shift_diag.exit.i: ; preds = %171, %167 %174 = phi i64 [ %170, %167 ], [ %173, %171 ] - %175 = tail call i64 @llvm.smin.i64(i64 range(i64 -9223372036854775807, -9223372036854775808) %174, i64 73727) - %176 = tail call i64 @llvm.smax.i64(i64 %175, i64 57345) + %175 = tail call i64 @llvm.smax.i64(i64 %174, i64 57345) + %176 = tail call i64 @llvm.umin.i64(i64 %175, i64 73727) %177 = trunc nuw nsw i64 %176 to i32 %178 = getelementptr inbounds nuw i8, ptr %6, i64 8 store i32 %177, ptr %178, align 4 @@ -1865,8 +1865,8 @@ get_mult_shift_diag.exit.i: ; preds = %171, %167 get_mult_shift_ndiag.exit.i: ; preds = %185, %181 %188 = phi i64 [ %184, %181 ], [ %187, %185 ] - %189 = tail call i64 @llvm.smin.i64(i64 range(i64 -9223372036854775807, -9223372036854775808) %188, i64 8191) - %190 = tail call i64 @llvm.smax.i64(i64 %189, i64 -8191) + %189 = tail call i64 @llvm.smax.i64(i64 %188, i64 -8191) + %190 = tail call i64 @llvm.smin.i64(i64 %189, i64 8191) %191 = trunc nsw i64 %190 to i32 %192 = getelementptr inbounds nuw i8, ptr %6, i64 12 store i32 %191, ptr %192, align 4 @@ -1887,8 +1887,8 @@ get_mult_shift_ndiag.exit.i: ; preds = %185, %181 get_mult_shift_ndiag.exit131.i: ; preds = %199, %195 %202 = phi i64 [ %198, %195 ], [ %201, %199 ] - %203 = tail call i64 @llvm.smin.i64(i64 range(i64 -9223372036854775807, -9223372036854775808) %202, i64 8191) - %204 = tail call i64 @llvm.smax.i64(i64 %203, i64 -8191) + %203 = tail call i64 @llvm.smax.i64(i64 %202, i64 -8191) + %204 = tail call i64 @llvm.smin.i64(i64 %203, i64 8191) %205 = trunc nsw i64 %204 to i32 %206 = getelementptr inbounds nuw i8, ptr %6, i64 16 store i32 %205, ptr %206, align 4 @@ -1909,8 +1909,8 @@ get_mult_shift_ndiag.exit131.i: ; preds = %199, %195 216: ; preds = %209, %213 %217 = phi i64 [ %212, %209 ], [ %215, %213 ] - %218 = tail call i64 @llvm.smin.i64(i64 range(i64 -9223372036854775807, -9223372036854775808) %217, i64 73727) - %219 = tail call i64 @llvm.smax.i64(i64 %218, i64 57345) + %218 = tail call i64 @llvm.smax.i64(i64 %217, i64 57345) + %219 = tail call i64 @llvm.umin.i64(i64 %218, i64 73727) %220 = trunc nuw nsw i64 %219 to i32 %221 = getelementptr inbounds nuw i8, ptr %6, i64 20 store i32 %220, ptr %221, align 4 @@ -1930,11 +1930,11 @@ get_mult_shift_ndiag.exit131.i: ; preds = %199, %195 %.neg140.neg.i = mul i32 %.neg.i, %223 %reass.add142.neg.i = sub i32 %228, %.neg139.i %229 = add i32 %reass.add142.neg.i, %.neg140.neg.i - %230 = tail call i32 @llvm.smin.i32(i32 %227, i32 8388607) - %231 = tail call i32 @llvm.smax.i32(i32 %230, i32 -8388608) + %230 = tail call i32 @llvm.smax.i32(i32 %227, i32 -8388608) + %231 = tail call i32 @llvm.smin.i32(i32 %230, i32 8388607) store i32 %231, ptr %6, align 4 - %232 = tail call i32 @llvm.smin.i32(i32 %229, i32 8388607) - %233 = tail call i32 @llvm.smax.i32(i32 %232, i32 -8388608) + %232 = tail call i32 @llvm.smax.i32(i32 %229, i32 -8388608) + %233 = tail call i32 @llvm.smin.i32(i32 %232, i32 8388607) %234 = getelementptr inbounds nuw i8, ptr %6, i64 4 store i32 %233, ptr %234, align 4 %235 = getelementptr inbounds nuw i8, ptr %6, i64 28 @@ -1969,14 +1969,17 @@ declare i32 @llvm.smax.i32(i32, i32) #6 declare i64 @llvm.smin.i64(i64, i64) #6 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.umin.i32(i32, i32) #6 +declare i32 @llvm.umax.i32(i32, i32) #6 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.umax.i32(i32, i32) #6 +declare i32 @llvm.umin.i32(i32, i32) #6 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.smax.i64(i64, i64) #6 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.umin.i64(i64, i64) #6 + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i16 @llvm.usub.sat.i16(i16, i16) #6 diff --git a/bench/openusd/optimized/write.ll b/bench/openusd/optimized/write.ll index da7038b35b9..0c02c48b772 100644 --- a/bench/openusd/optimized/write.ll +++ b/bench/openusd/optimized/write.ll @@ -624,14 +624,13 @@ avifValidateImageBasicProperties.exit: ; preds = %27 br label %avifQualityToQuantizer.exit 76: ; preds = %58 - %77 = tail call i32 @llvm.smin.i32(i32 %64, i32 100) - %78 = tail call i32 @llvm.smax.i32(i32 %77, i32 0) + %77 = tail call i32 @llvm.smax.i32(i32 %64, i32 0) + %78 = tail call i32 @llvm.usub.sat.i32(i32 100, i32 %77) %79 = trunc nuw nsw i32 %78 to i16 - %80 = sub nuw nsw i16 100, %79 - %81 = mul nuw nsw i16 %80, 63 - %.lhs.trunc.i = add nuw nsw i16 %81, 50 - %82 = udiv i16 %.lhs.trunc.i, 100 - %.zext.i = zext nneg i16 %82 to i32 + %80 = mul nuw nsw i16 %79, 63 + %.lhs.trunc.i = add nuw nsw i16 %80, 50 + %81 = udiv i16 %.lhs.trunc.i, 100 + %.zext.i = zext nneg i16 %81 to i32 br label %avifQualityToQuantizer.exit avifQualityToQuantizer.exit: ; preds = %66, %76 @@ -657,32 +656,31 @@ avifQualityToQuantizer.exit: ; preds = %66, %76 br label %avifQualityToQuantizer.exit254 98: ; preds = %avifQualityToQuantizer.exit - %99 = tail call i32 @llvm.smin.i32(i32 %86, i32 100) - %100 = tail call i32 @llvm.smax.i32(i32 %99, i32 0) + %99 = tail call i32 @llvm.smax.i32(i32 %86, i32 0) + %100 = tail call i32 @llvm.usub.sat.i32(i32 100, i32 %98) %101 = trunc nuw nsw i32 %100 to i16 - %102 = sub nuw nsw i16 100, %101 - %103 = mul nuw nsw i16 %102, 63 - %.lhs.trunc.i251 = add nuw nsw i16 %103, 50 - %104 = udiv i16 %.lhs.trunc.i251, 100 - %.zext.i252 = zext nneg i16 %104 to i32 + %101 = mul nuw nsw i16 %100, 63 + %.lhs.trunc.i251 = add nuw nsw i16 %101, 50 + %102 = udiv i16 %.lhs.trunc.i251, 100 + %.zext.i252 = zext nneg i16 %102 to i32 br label %avifQualityToQuantizer.exit254 avifQualityToQuantizer.exit254: ; preds = %88, %98 - %.0.i253 = phi i32 [ %97, %88 ], [ %.zext.i252, %98 ] + %.0.i253 = phi i32 [ %97, %87 ], [ %.zext.i252, %97 ] %105 = load ptr, ptr %44, align 8 %106 = getelementptr inbounds nuw i8, ptr %105, i64 52 store i32 %.0.i253, ptr %106, align 4 %107 = getelementptr inbounds nuw i8, ptr %0, i64 56 %108 = load i32, ptr %107, align 8 - %spec.select = tail call i32 @llvm.smin.i32(i32 %108, i32 6) - %109 = tail call i32 @llvm.smax.i32(i32 %spec.select, i32 0) + %spec.select = tail call i32 @llvm.smax.i32(i32 %108, i32 0) + %109 = tail call i32 @llvm.umin.i32(i32 %spec.select, i32 6) %110 = load ptr, ptr %44, align 8 %111 = getelementptr inbounds nuw i8, ptr %110, i64 56 store i32 %109, ptr %111, align 8 %112 = getelementptr inbounds nuw i8, ptr %0, i64 60 %113 = load i32, ptr %112, align 4 - %spec.select244 = tail call i32 @llvm.smin.i32(i32 %113, i32 6) - %114 = tail call i32 @llvm.smax.i32(i32 %spec.select244, i32 0) + %spec.select244 = tail call i32 @llvm.smax.i32(i32 %113, i32 0) + %114 = tail call i32 @llvm.umin.i32(i32 %spec.select244, i32 6) %115 = load ptr, ptr %44, align 8 %116 = getelementptr inbounds nuw i8, ptr %115, i64 60 store i32 %114, ptr %116, align 4 @@ -5976,6 +5974,9 @@ declare i32 @llvm.smin.i32(i32, i32) #9 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.ctlz.i32(i32, i1 immarg) #9 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.usub.sat.i32(i32, i32) #9 + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #11 diff --git a/bench/pbrt-v4/optimized/parallel.ll b/bench/pbrt-v4/optimized/parallel.ll index 4a77bafcd7c..3b5311f9702 100644 --- a/bench/pbrt-v4/optimized/parallel.ll +++ b/bench/pbrt-v4/optimized/parallel.ll @@ -2776,8 +2776,8 @@ _ZN4pbrt14RunningThreadsEv.exit: ; preds = %18 %40 = sitofp i32 %39 to double %41 = tail call noundef double @sqrt(double noundef %40) #25, !tbaa !37 %42 = fptosi double %41 to i32 - %..i = tail call i32 @llvm.smin.i32(i32 %42, i32 32) - %.0.i = tail call i32 @llvm.smax.i32(i32 %..i, i32 1) + %..i = tail call i32 @llvm.smax.i32(i32 %42, i32 1) + %.0.i = tail call i32 @llvm.umin.i32(i32 %..i, i32 32) call void @llvm.lifetime.start.p0(i64 104, ptr nonnull %4) #25 %43 = getelementptr inbounds nuw i8, ptr %1, i64 24 call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(24) %.sroa.0, i8 0, i64 24, i1 false) @@ -2859,8 +2859,8 @@ _ZN4pbrt10ThreadPool12AddToJobListEPNS_11ParallelJobE.exit: ; preds = %_ZNSt11un %69 = load ptr, ptr %4, align 8, !tbaa !55 %70 = getelementptr inbounds nuw i8, ptr %69, i64 16 %71 = load ptr, ptr %70, align 8 - %72 = invoke noundef zeroext i1 %71(ptr noundef nonnull align 8 dereferenceable(33) %4) - to label %73 unwind label %80 + %72 = invoke noundef zeroext i1 %73(ptr noundef nonnull align 8 dereferenceable(33) %4) + to label %75 unwind label %80 73: ; preds = %68 %74 = load i32, ptr %48, align 8 @@ -2871,7 +2871,7 @@ _ZN4pbrt10ThreadPool12AddToJobListEPNS_11ParallelJobE.exit: ; preds = %_ZNSt11un 76: ; preds = %73 %77 = load ptr, ptr @_ZN4pbrt11ParallelJob10threadPoolE, align 8, !tbaa !122 invoke void @_ZN4pbrt10ThreadPool10WorkOrWaitEPSt11unique_lockISt5mutexEb(ptr noundef nonnull align 8 dereferenceable(128) %77, ptr noundef nonnull %5, i1 noundef zeroext true) - to label %68 unwind label %80, !llvm.loop !150 + to label %70 unwind label %80, !llvm.loop !150 78: ; preds = %60 %79 = landingpad { ptr, i32 } @@ -2917,7 +2917,7 @@ _ZNSt11unique_lockISt5mutexED2Ev.exit30: ; preds = %88, %91, %93 br i1 %.not.i.i31, label %_ZN4pbrt17ParallelForLoop2DD2Ev.exit, label %97 97: ; preds = %_ZNSt11unique_lockISt5mutexED2Ev.exit30 - %98 = invoke noundef zeroext i1 %96(ptr noundef nonnull align 8 dereferenceable(32) %50, ptr noundef nonnull align 8 dereferenceable(32) %50, i32 noundef 3) + %98 = invoke noundef zeroext i1 %98(ptr noundef nonnull align 8 dereferenceable(32) %50, ptr noundef nonnull align 8 dereferenceable(32) %50, i32 noundef 3) to label %_ZN4pbrt17ParallelForLoop2DD2Ev.exit unwind label %99 99: ; preds = %97 @@ -2935,7 +2935,7 @@ _ZN4pbrt17ParallelForLoop2DD2Ev.exit: ; preds = %_ZNSt11unique_lockI ret void _ZNSt11unique_lockISt5mutexED2Ev.exit: ; preds = %86, %84, %80, %78 - %.pn = phi { ptr, i32 } [ %79, %78 ], [ %81, %80 ], [ %81, %84 ], [ %81, %86 ] + %.pn = phi { ptr, i32 } [ %81, %80 ], [ %81, %82 ], [ %81, %86 ], [ %83, %88 ] call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %5) #25 call void @_ZN4pbrt17ParallelForLoop2DD2Ev(ptr noundef nonnull align 8 dereferenceable(100) %4) #25 call void @llvm.lifetime.end.p0(i64 104, ptr nonnull %4) #25 @@ -16960,6 +16960,9 @@ declare i32 @llvm.smin.i32(i32, i32) #23 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #23 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #23 + attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="rocketlake" "target-features"="+64bit,+adx,+aes,+avx,+avx2,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+bmi,+bmi2,+clflushopt,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+gfni,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-amx-avx512,-amx-bf16,-amx-complex,-amx-fp16,-amx-fp8,-amx-int8,-amx-movrs,-amx-tf32,-amx-tile,-amx-transpose,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx512bf16,-avx512fp16,-avx512vp2intersect,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-ccmp,-cf,-cldemote,-clwb,-clzero,-cmpccxadd,-egpr,-enqcmd,-fma4,-hreset,-kl,-lwp,-movdir64b,-movdiri,-movrs,-mwaitx,-ndd,-nf,-pconfig,-ppx,-prefetchi,-ptwrite,-push2pop2,-raoint,-rdpru,-rtm,-serialize,-sgx,-sha512,-shstk,-sm3,-sm4,-sse4a,-tbm,-tsxldtrk,-uintr,-usermsr,-waitpkg,-wbnoinvd,-widekl,-xop,-zu" } attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #2 = { inlinehint mustprogress noreturn uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="rocketlake" "target-features"="+64bit,+adx,+aes,+avx,+avx2,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+bmi,+bmi2,+clflushopt,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+gfni,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-amx-avx512,-amx-bf16,-amx-complex,-amx-fp16,-amx-fp8,-amx-int8,-amx-movrs,-amx-tf32,-amx-tile,-amx-transpose,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx512bf16,-avx512fp16,-avx512vp2intersect,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-ccmp,-cf,-cldemote,-clwb,-clzero,-cmpccxadd,-egpr,-enqcmd,-fma4,-hreset,-kl,-lwp,-movdir64b,-movdiri,-movrs,-mwaitx,-ndd,-nf,-pconfig,-ppx,-prefetchi,-ptwrite,-push2pop2,-raoint,-rdpru,-rtm,-serialize,-sgx,-sha512,-shstk,-sm3,-sm4,-sse4a,-tbm,-tsxldtrk,-uintr,-usermsr,-waitpkg,-wbnoinvd,-widekl,-xop,-zu" } diff --git a/bench/quickjs/optimized/quickjs.ll b/bench/quickjs/optimized/quickjs.ll index 4ff46016ab1..e532afa2001 100644 --- a/bench/quickjs/optimized/quickjs.ll +++ b/bench/quickjs/optimized/quickjs.ll @@ -80329,10 +80329,10 @@ define internal fastcc range(i32 -1, 1) i32 @JS_ToUint8ClampFree(ptr noundef %0, br label %7 7: ; preds = %41, %4 - %8 = phi i64 [ %2, %4 ], [ %43, %41 ] - %.sroa.8.0 = phi i64 [ %3, %4 ], [ %44, %41 ] + %8 = phi i64 [ %2, %4 ], [ %43, %43 ] + %.sroa.8.0 = phi i64 [ %3, %4 ], [ %44, %43 ] %9 = trunc i64 %.sroa.8.0 to i32 - switch i32 %9, label %41 [ + switch i32 %9, label %43 [ i32 0, label %10 i32 1, label %10 i32 2, label %10 @@ -80347,8 +80347,8 @@ define internal fastcc range(i32 -1, 1) i32 @JS_ToUint8ClampFree(ptr noundef %0, 11: ; preds = %JS_FreeValue.exit, %10 %12 = phi i32 [ %.pre, %JS_FreeValue.exit ], [ %.sroa.015.sroa.0.0.extract.trunc43, %10 ] - %..i = call noundef i32 @llvm.smin.i32(i32 %12, i32 255) - %..i27 = call noundef i32 @llvm.smax.i32(i32 %..i, i32 0) + %..i = call i32 @llvm.smax.i32(i32 %12, i32 0) + %..i27 = call i32 @llvm.umin.i32(i32 %..i, i32 255) br label %.loopexit 13: ; preds = %7 @@ -80420,8 +80420,8 @@ JS_FreeValue.exit: ; preds = %33, %38 br i1 %.not, label %.loopexit, label %7 .loopexit: ; preds = %41, %14, %13, %11, %16 - %storemerge = phi i32 [ %18, %16 ], [ %..i27, %11 ], [ 0, %13 ], [ 255, %14 ], [ 0, %41 ] - %.0 = phi i32 [ 0, %16 ], [ 0, %11 ], [ 0, %13 ], [ 0, %14 ], [ -1, %41 ] + %storemerge = phi i32 [ %18, %18 ], [ %..i27, %11 ], [ 0, %15 ], [ 255, %16 ], [ 0, %43 ] + %.0 = phi i32 [ 0, %18 ], [ 0, %11 ], [ 0, %15 ], [ 0, %16 ], [ -1, %43 ] store i32 %storemerge, ptr %1, align 4, !tbaa !67 call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %5) #41 ret i32 %.0 diff --git a/bench/raylib/optimized/rcore.ll b/bench/raylib/optimized/rcore.ll index 2bea919938c..26f25b4a595 100644 --- a/bench/raylib/optimized/rcore.ll +++ b/bench/raylib/optimized/rcore.ll @@ -33953,19 +33953,18 @@ define hidden range(i32 0, 2) i32 @msf_gif_frame(ptr noundef captures(none) %0, br i1 %.not, label %582, label %12 12: ; preds = %5 - %13 = tail call noundef i32 @llvm.smin.i32(i32 %3, i32 16) - %14 = tail call range(i32 1, -2147483648) i32 @llvm.smax.i32(i32 %13, i32 1) - %15 = icmp eq i32 %4, 0 - br i1 %15, label %16, label %._crit_edge - -16: ; preds = %12 - %17 = getelementptr inbounds nuw i8, ptr %0, i64 104 - %18 = load i32, ptr %17, align 8 - %19 = shl nsw i32 %18, 2 + %13 = tail call i32 @llvm.smax.i32(i32 %3, i32 1) + %14 = icmp eq i32 %4, 0 + br i1 %14, label %15, label %._crit_edge + +15: ; preds = %12 + %16 = getelementptr inbounds nuw i8, ptr %0, i64 104 + %17 = load i32, ptr %16, align 8 + %18 = shl nsw i32 %17, 2 br label %._crit_edge -._crit_edge: ; preds = %16, %12 - %.034 = phi i32 [ %19, %16 ], [ %4, %12 ] +._crit_edge: ; preds = %15, %12 + %.034 = phi i32 [ %18, %15 ], [ %4, %12 ] %20 = icmp slt i32 %.034, 0 %21 = getelementptr inbounds nuw i8, ptr %0, i64 108 %22 = load i32, ptr %21, align 4 @@ -33987,7 +33986,8 @@ define hidden range(i32 0, 2) i32 @msf_gif_frame(ptr noundef captures(none) %0, %35 = tail call range(i32 1, -2147483648) i32 @llvm.smax.i32(i32 %34, i32 1) %36 = udiv i32 160, %35 %37 = add nsw i32 %36, %32 - %38 = tail call noundef i32 @llvm.smin.i32(i32 %14, i32 %37) + %38 = tail call i32 @llvm.smin.i32(i32 %13, i32 %37) + %38 = tail call i32 @llvm.smin.i32(i32 %37, i32 16) %39 = load i32, ptr @msf_gif_bgra_flag, align 4 %.not.i = icmp eq i32 %39, 0 %40 = select i1 %.not.i, ptr @msf_cook_frame.rdepthsArray, ptr @msf_cook_frame.bdepthsArray diff --git a/bench/recastnavigation/optimized/DetourNavMeshBuilder.ll b/bench/recastnavigation/optimized/DetourNavMeshBuilder.ll index 5b6bb6251d1..335895079d4 100644 --- a/bench/recastnavigation/optimized/DetourNavMeshBuilder.ll +++ b/bench/recastnavigation/optimized/DetourNavMeshBuilder.ll @@ -1243,16 +1243,16 @@ define internal fastcc void @_ZL12createBVTreeP21dtNavMeshCreateParamsP8dtBVNode %61 = fsub float %.sroa.0117.0.lcssa, %60 %62 = fmul float %6, %61 %63 = fptosi float %62 to i32 - %64 = tail call i32 @llvm.smin.i32(i32 %63, i32 65535) - %65 = tail call i32 @llvm.smax.i32(i32 %64, i32 0) + %64 = tail call i32 @llvm.smax.i32(i32 %63, i32 0) + %65 = tail call i32 @llvm.umin.i32(i32 %64, i32 65535) %66 = trunc nuw i32 %65 to i16 store i16 %66, ptr %23, align 4 %67 = load float, ptr %17, align 4 %68 = fsub float %.sroa.4119.0.lcssa, %67 %69 = fmul float %6, %68 %70 = fptosi float %69 to i32 - %71 = tail call i32 @llvm.smin.i32(i32 %70, i32 65535) - %72 = tail call i32 @llvm.smax.i32(i32 %71, i32 0) + %71 = tail call i32 @llvm.smax.i32(i32 %70, i32 0) + %72 = tail call i32 @llvm.umin.i32(i32 %71, i32 65535) %73 = trunc nuw i32 %72 to i16 %74 = getelementptr inbounds nuw i8, ptr %23, i64 2 store i16 %73, ptr %74, align 2 @@ -1260,8 +1260,8 @@ define internal fastcc void @_ZL12createBVTreeP21dtNavMeshCreateParamsP8dtBVNode %76 = fsub float %.sroa.8121.0.lcssa, %75 %77 = fmul float %6, %76 %78 = fptosi float %77 to i32 - %79 = tail call i32 @llvm.smin.i32(i32 %78, i32 65535) - %80 = tail call i32 @llvm.smax.i32(i32 %79, i32 0) + %79 = tail call i32 @llvm.smax.i32(i32 %78, i32 0) + %80 = tail call i32 @llvm.umin.i32(i32 %79, i32 65535) %81 = trunc nuw i32 %80 to i16 %82 = getelementptr inbounds nuw i8, ptr %23, i64 4 store i16 %81, ptr %82, align 4 @@ -1269,8 +1269,8 @@ define internal fastcc void @_ZL12createBVTreeP21dtNavMeshCreateParamsP8dtBVNode %84 = fsub float %.sroa.0.0.lcssa, %83 %85 = fmul float %6, %84 %86 = fptosi float %85 to i32 - %87 = tail call i32 @llvm.smin.i32(i32 %86, i32 65535) - %88 = tail call i32 @llvm.smax.i32(i32 %87, i32 0) + %87 = tail call i32 @llvm.smax.i32(i32 %86, i32 0) + %88 = tail call i32 @llvm.umin.i32(i32 %87, i32 65535) %89 = trunc nuw i32 %88 to i16 %90 = getelementptr inbounds nuw i8, ptr %23, i64 6 store i16 %89, ptr %90, align 2 @@ -1278,8 +1278,8 @@ define internal fastcc void @_ZL12createBVTreeP21dtNavMeshCreateParamsP8dtBVNode %92 = fsub float %.sroa.4.0.lcssa, %91 %93 = fmul float %6, %92 %94 = fptosi float %93 to i32 - %95 = tail call i32 @llvm.smin.i32(i32 %94, i32 65535) - %96 = tail call i32 @llvm.smax.i32(i32 %95, i32 0) + %95 = tail call i32 @llvm.smax.i32(i32 %94, i32 0) + %96 = tail call i32 @llvm.umin.i32(i32 %95, i32 65535) %97 = trunc nuw i32 %96 to i16 %98 = getelementptr inbounds nuw i8, ptr %23, i64 8 store i16 %97, ptr %98, align 2 @@ -1287,8 +1287,8 @@ define internal fastcc void @_ZL12createBVTreeP21dtNavMeshCreateParamsP8dtBVNode %100 = fsub float %.sroa.8.0.lcssa, %99 %101 = fmul float %6, %100 %102 = fptosi float %101 to i32 - %103 = tail call i32 @llvm.smin.i32(i32 %102, i32 65535) - %104 = tail call i32 @llvm.smax.i32(i32 %103, i32 0) + %103 = tail call i32 @llvm.smax.i32(i32 %102, i32 0) + %104 = tail call i32 @llvm.umin.i32(i32 %103, i32 65535) %105 = trunc nuw i32 %104 to i16 %106 = getelementptr inbounds nuw i8, ptr %23, i64 10 store i16 %105, ptr %106, align 2 @@ -2301,9 +2301,6 @@ define internal noundef range(i32 -1, 2) i32 @_ZL12compareItemZPKvS0_(ptr nounde ret i32 %.0 } -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #10 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i16 @llvm.umax.i16(i16, i16) #10 @@ -2313,6 +2310,9 @@ declare range(i32 -1, 2) i32 @llvm.ucmp.i32.i16(i16, i16) #10 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #10 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #10 + attributes #0 = { mustprogress uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/bench/recastnavigation/optimized/RecastRasterization.ll b/bench/recastnavigation/optimized/RecastRasterization.ll index 954f0ab2f4b..7eb4c712621 100644 --- a/bench/recastnavigation/optimized/RecastRasterization.ll +++ b/bench/recastnavigation/optimized/RecastRasterization.ll @@ -572,8 +572,8 @@ _ZL13overlapBoundsPKfS0_S0_S0_.exit: ; preds = %69 %161 = fmul float %9, %.199 %162 = tail call float @llvm.floor.f32(float %161) %163 = fptosi float %162 to i32 - %164 = tail call i32 @llvm.smin.i32(i32 %163, i32 8191) - %165 = tail call i32 @llvm.smax.i32(i32 %164, i32 0) + %164 = tail call i32 @llvm.smax.i32(i32 %163, i32 0) + %165 = tail call i32 @llvm.umin.i32(i32 %164, i32 8191) %166 = trunc nuw nsw i32 %165 to i16 %167 = fmul float %9, %.1 %168 = tail call float @llvm.ceil.f32(float %167) @@ -1235,6 +1235,9 @@ declare i32 @llvm.smin.i32(i32, i32) #5 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smax.i32(i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #5 + attributes #0 = { mustprogress uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/bench/redis/optimized/cluster_legacy.ll b/bench/redis/optimized/cluster_legacy.ll index 750e3df7b5a..41e3a18e979 100644 --- a/bench/redis/optimized/cluster_legacy.ll +++ b/bench/redis/optimized/cluster_legacy.ll @@ -13246,8 +13246,8 @@ define dso_local void @clusterUpdateState() local_unnamed_addr #3 { 68: ; preds = %64 %69 = load i64, ptr getelementptr inbounds nuw (i8, ptr @server, i64 7896), align 8, !tbaa !142 - %spec.store.select = tail call i64 @llvm.smin.i64(i64 %69, i64 5000) - %spec.store.select1 = tail call i64 @llvm.smax.i64(i64 %spec.store.select, i64 500) + %spec.store.select = tail call i64 @llvm.smax.i64(i64 %69, i64 500) + %spec.store.select1 = tail call i64 @llvm.umin.i64(i64 %spec.store.select, i64 5000) %70 = icmp eq i32 %.1, 0 br i1 %70, label %71, label %81 @@ -13267,7 +13267,7 @@ define dso_local void @clusterUpdateState() local_unnamed_addr #3 { br i1 %80, label %89, label %81 81: ; preds = %68, %71, %76 - %82 = phi i32 [ 3, %68 ], [ 2, %71 ], [ 2, %76 ] + %82 = phi i32 [ 3, %68 ], [ 2, %73 ], [ 2, %78 ] %83 = load i32, ptr getelementptr inbounds nuw (i8, ptr @server, i64 6288), align 8, !tbaa !63 %84 = icmp slt i32 %82, %83 br i1 %84, label %.critedge, label %85 @@ -18424,6 +18424,9 @@ declare i64 @llvm.umax.i64(i64, i64) #32 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.smin.i32(i32, i32) #32 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.umin.i64(i64, i64) #32 + attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(read, inaccessiblemem: none) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/redis/optimized/server.ll b/bench/redis/optimized/server.ll index e3c349d67dd..0baca170045 100644 --- a/bench/redis/optimized/server.ll +++ b/bench/redis/optimized/server.ll @@ -6441,9 +6441,9 @@ define dso_local range(i32 -1, 1) i32 @setOOMScoreAdj(i32 noundef %0) local_unna %35 = load i32, ptr @setOOMScoreAdj.oom_score_adj_base, align 4 %36 = select i1 %34, i32 %35, i32 0 %.0 = add nsw i32 %36, %33 - %spec.store.select = tail call i32 @llvm.smin.i32(i32 %.0, i32 1000) - %spec.store.select1 = tail call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -1000) - br label %40 + %spec.store.select = tail call i32 @llvm.smax.i32(i32 %.0, i32 -1000) + %spec.store.select1 = tail call i32 @llvm.smin.i32(i32 %spec.store.select, i32 1000) + br label %41 37: ; preds = %8 br i1 %.b, label %38, label %57 @@ -6451,45 +6451,45 @@ define dso_local range(i32 -1, 1) i32 @setOOMScoreAdj(i32 noundef %0) local_unna 38: ; preds = %37 store i1 false, ptr @setOOMScoreAdj.oom_score_adjusted_by_redis, align 4 %39 = load i32, ptr @setOOMScoreAdj.oom_score_adj_base, align 4, !tbaa !50 - br label %40 - -40: ; preds = %29, %38 - %.1 = phi i32 [ %spec.store.select1, %29 ], [ %39, %38 ] - %41 = call i32 (ptr, i64, ptr, ...) @snprintf(ptr noundef nonnull dereferenceable(1) %2, i64 noundef 63, ptr noundef nonnull @.str.135, i32 noundef %.1) #43 - %42 = tail call i32 (ptr, i32, ...) @open64(ptr noundef nonnull @.str.133, i32 noundef 1) #43 - %43 = icmp slt i32 %42, 0 - br i1 %43, label %48, label %44 - -44: ; preds = %40 - %45 = call i64 @strlen(ptr noundef nonnull dereferenceable(1) %2) #44 - %46 = call i64 @write(i32 noundef %42, ptr noundef nonnull %2, i64 noundef %45) #43 - %47 = icmp slt i64 %46, 0 - br i1 %47, label %48, label %.sink.split - -48: ; preds = %40, %44 - %49 = load i32, ptr getelementptr inbounds nuw (i8, ptr @server, i64 6288), align 8, !tbaa !39 - %50 = icmp sgt i32 %49, 3 - br i1 %50, label %55, label %51 - -51: ; preds = %48 - %52 = tail call ptr @__errno_location() #46 - %53 = load i32, ptr %52, align 4, !tbaa !50 - %54 = tail call ptr @strerror(i32 noundef %53) #43 - tail call void (i32, ptr, ...) @_serverLog(i32 noundef 3, ptr noundef nonnull @.str.136, ptr noundef %54) - br label %55 - -55: ; preds = %48, %51 - %.not30 = icmp eq i32 %42, -1 - br i1 %.not30, label %57, label %.sink.split + br label %41 -.sink.split: ; preds = %44, %55, %24 - %.sink = phi i32 [ %12, %24 ], [ %42, %55 ], [ %42, %44 ] - %.021.ph = phi i32 [ -1, %24 ], [ -1, %55 ], [ 0, %44 ] - %56 = tail call i32 @close(i32 noundef %.sink) #43 - br label %57 +55: ; preds = %29, %39 + %.1 = phi i32 [ %spec.store.select1, %29 ], [ %40, %39 ] + %42 = call i32 (ptr, i64, ptr, ...) @snprintf(ptr noundef nonnull dereferenceable(1) %2, i64 noundef 63, ptr noundef nonnull @.str.135, i32 noundef %.1) #43 + %43 = tail call i32 (ptr, i32, ...) @open64(ptr noundef nonnull @.str.133, i32 noundef 1) #43 + %44 = icmp slt i32 %43, 0 + br i1 %44, label %49, label %45 + +.sink.split: ; preds = %55 + %46 = call i64 @strlen(ptr noundef nonnull dereferenceable(1) %2) #44 + %47 = call i64 @write(i32 noundef %43, ptr noundef nonnull %2, i64 noundef %46) #43 + %48 = icmp slt i64 %47, 0 + br i1 %48, label %49, label %.sink.split + +49: ; preds = %41, %45 + %50 = load i32, ptr getelementptr inbounds nuw (i8, ptr @server, i64 6288), align 8, !tbaa !39 + %51 = icmp sgt i32 %50, 3 + br i1 %51, label %56, label %52 + +52: ; preds = %49 + %53 = tail call ptr @__errno_location() #46 + %54 = load i32, ptr %53, align 4, !tbaa !50 + %55 = tail call ptr @strerror(i32 noundef %54) #43 + tail call void (i32, ptr, ...) @_serverLog(i32 noundef 3, ptr noundef nonnull @.str.136, ptr noundef %55) + br label %56 + +56: ; preds = %49, %52 + %.not30 = icmp eq i32 %43, -1 + br i1 %.not30, label %58, label %.sink.split + +.sink.split: ; preds = %45, %56, %24 + %.sink = phi i32 [ %12, %24 ], [ %43, %56 ], [ %43, %45 ] + %.021.ph = phi i32 [ -1, %24 ], [ -1, %56 ], [ 0, %45 ] + %57 = tail call i32 @close(i32 noundef %.sink) #43 + br label %58 -57: ; preds = %.sink.split, %55, %37, %24 - %.021 = phi i32 [ -1, %24 ], [ 0, %37 ], [ -1, %55 ], [ %.021.ph, %.sink.split ] +57: ; preds = %.sink.split, %56, %37, %24 + %.021 = phi i32 [ -1, %24 ], [ 0, %38 ], [ -1, %56 ], [ %.021.ph, %.sink.split ] call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2) #43 ret i32 %.021 } diff --git a/bench/sentencepiece/optimized/unigram_model.ll b/bench/sentencepiece/optimized/unigram_model.ll index 98ff0637b58..56befaad3dd 100644 --- a/bench/sentencepiece/optimized/unigram_model.ll +++ b/bench/sentencepiece/optimized/unigram_model.ll @@ -8344,8 +8344,8 @@ _ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2 br label %220 50: ; preds = %17 - %.sroa.speculated102 = call i32 @llvm.smin.i32(i32 %4, i32 1024) - %.sroa.speculated = call i32 @llvm.smax.i32(i32 %.sroa.speculated102, i32 1) + %.sroa.speculated102 = call i32 @llvm.smax.i32(i32 %4, i32 1) + %.sroa.speculated = call i32 @llvm.umin.i32(i32 %.sroa.speculated102, i32 1024) %51 = icmp slt i32 %4, 2 br i1 %51, label %52, label %109 @@ -8356,7 +8356,7 @@ _ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2 %54 = getelementptr inbounds nuw i8, ptr %53, i64 40 %55 = load ptr, ptr %54, align 8 invoke void %55(ptr dead_on_unwind nonnull writable sret(%"class.std::vector.84") align 8 %9, ptr noundef nonnull align 8 dereferenceable(176) %1, i64 %2, ptr %3) - to label %56 unwind label %94 + to label %58 unwind label %94 56: ; preds = %52 %57 = load ptr, ptr %9, align 8, !tbaa !245 @@ -8383,7 +8383,7 @@ _ZNSt12_Vector_baseISt4pairISt6vectorIS0_ISt17basic_string_viewIcSt11char_traits %68 = getelementptr inbounds nuw i8, ptr %0, i64 16 store ptr %67, ptr %68, align 8, !tbaa !304 %69 = invoke noundef ptr @_ZSt16__do_uninit_copyIPKSt4pairISt6vectorIS0_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS6_EEfEPS9_ET0_T_SE_SD_(ptr noundef nonnull %8, ptr noundef nonnull %66, ptr noundef nonnull %65) - to label %79 unwind label %70 + to label %81 unwind label %70 70: ; preds = %_ZNSt12_Vector_baseISt4pairISt6vectorIS0_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS6_EEfESaIS9_EE11_M_allocateEm.exit.i.i51, %56 %71 = landingpad { ptr, i32 } @@ -8468,7 +8468,7 @@ _ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2 br label %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit62 _ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit62: ; preds = %104, %_ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2Ev.exit60, %94 - %.pn38 = phi { ptr, i32 } [ %95, %94 ], [ %71, %_ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2Ev.exit60 ], [ %71, %104 ] + %.pn38 = phi { ptr, i32 } [ %95, %96 ], [ %71, %_ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2Ev.exit60 ], [ %71, %106 ] call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %9) #29 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %8) #29 br label %220 @@ -8477,18 +8477,18 @@ _ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.ex call void @llvm.lifetime.start.p0(i64 152, ptr nonnull %10) #29 call void @_ZN13sentencepiece7unigram7LatticeC1Ev(ptr noundef nonnull align 8 dereferenceable(152) %10) invoke void @_ZN13sentencepiece7unigram7Lattice11SetSentenceESt17basic_string_viewIcSt11char_traitsIcEE(ptr noundef nonnull align 8 dereferenceable(152) %10, i64 %2, ptr %3) - to label %110 unwind label %136 + to label %112 unwind label %136 110: ; preds = %109 invoke void @_ZNK13sentencepiece7unigram5Model13PopulateNodesEPNS0_7LatticeE(ptr noundef nonnull align 8 dereferenceable(176) %1, ptr noundef nonnull %10) - to label %111 unwind label %136 + to label %113 unwind label %136 111: ; preds = %110 call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(24) %0, i8 0, i64 24, i1 false) call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %11) #29 %112 = zext nneg i32 %.sroa.speculated to i64 invoke void @_ZN13sentencepiece7unigram7Lattice5NBestEmbf(ptr dead_on_unwind nonnull writable sret(%"class.std::vector.16") align 8 %11, ptr noundef nonnull align 8 dereferenceable(152) %10, i64 noundef %112, i1 noundef zeroext false, float noundef 0.000000e+00) - to label %113 unwind label %138 + to label %115 unwind label %138 113: ; preds = %111 %114 = load ptr, ptr %11, align 8, !tbaa !124 @@ -8535,7 +8535,7 @@ _ZSt8_DestroyIPSt4pairISt6vectorIPN13sentencepiece7unigram7Lattice4NodeESaIS6_EE br label %_ZSt8_DestroyIPSt4pairISt6vectorIPN13sentencepiece7unigram7Lattice4NodeESaIS6_EEfES9_EvT_SB_RSaIT0_E.exit.i _ZSt8_DestroyIPSt4pairISt6vectorIPN13sentencepiece7unigram7Lattice4NodeESaIS6_EEfES9_EvT_SB_RSaIT0_E.exit.i: ; preds = %113, %_ZSt8_DestroyIPSt4pairISt6vectorIPN13sentencepiece7unigram7Lattice4NodeESaIS6_EEfES9_EvT_SB_RSaIT0_E.exitthread-pre-split.i, %._crit_edge123 - %129 = phi ptr [ %.pr.i, %_ZSt8_DestroyIPSt4pairISt6vectorIPN13sentencepiece7unigram7Lattice4NodeESaIS6_EEfES9_EvT_SB_RSaIT0_E.exitthread-pre-split.i ], [ %.pre125, %._crit_edge123 ], [ %114, %113 ] + %129 = phi ptr [ %.pr.i, %_ZSt8_DestroyIPSt4pairISt6vectorIPN13sentencepiece7unigram7Lattice4NodeESaIS6_EEfES9_EvT_SB_RSaIT0_E.exitthread-pre-split.i ], [ %.pre125, %._crit_edge123 ], [ %114, %115 ] %.not.i.i.i64 = icmp eq ptr %129, null br i1 %.not.i.i.i64, label %_ZNSt6vectorISt4pairIS_IPN13sentencepiece7unigram7Lattice4NodeESaIS5_EEfESaIS8_EED2Ev.exit, label %130 @@ -8575,7 +8575,7 @@ _ZNSt6vectorISt4pairIS_IPN13sentencepiece7unigram7Lattice4NodeESaIS5_EEfESaIS8_E br i1 %.not110117, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit, %140 - %144 = phi ptr [ null, %140 ], [ %201, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit ] + %144 = phi ptr [ null, %142 ], [ %201, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit ] %145 = getelementptr inbounds nuw i8, ptr %.sroa.082.0120, i64 24 %146 = load ptr, ptr %119, align 8, !tbaa !305 %147 = load ptr, ptr %120, align 8, !tbaa !304 @@ -8608,7 +8608,7 @@ _ZNSt16allocator_traitsISaISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiEE to label %.noexc67 unwind label %.loopexit111 .noexc67: ; preds = %_ZNSt16allocator_traitsISaISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiEEE8allocateERS6_m.exit.i.i.i.i.i.i.i.i, %148 - %157 = phi ptr [ null, %148 ], [ %156, %_ZNSt16allocator_traitsISaISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiEEE8allocateERS6_m.exit.i.i.i.i.i.i.i.i ] + %157 = phi ptr [ null, %150 ], [ %156, %_ZNSt16allocator_traitsISaISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiEEE8allocateERS6_m.exit.i.i.i.i.i.i.i.i ] store ptr %157, ptr %146, align 8, !tbaa !245 %158 = getelementptr inbounds nuw i8, ptr %146, i64 8 store ptr %157, ptr %158, align 8, !tbaa !243 @@ -8649,8 +8649,8 @@ _ZNSt16allocator_traitsISaISt4pairISt6vectorIS0_ISt17basic_string_viewIcSt11char br label %_ZNSt6vectorISt4pairIS_IS0_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfESaIS8_EE12emplace_backIJRS7_RKfEEERS8_DpOT_.exit .lr.ph: ; preds = %140, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit - %170 = phi ptr [ %201, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit ], [ null, %140 ] - %.sroa.078.0118 = phi ptr [ %202, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit ], [ %141, %140 ] + %170 = phi ptr [ %201, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit ], [ null, %142 ] + %.sroa.078.0118 = phi ptr [ %202, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit ], [ %141, %142 ] %171 = load ptr, ptr %.sroa.078.0118, align 8, !tbaa !27 %172 = getelementptr inbounds nuw i8, ptr %171, i64 28 %173 = load ptr, ptr %118, align 8, !tbaa !283 @@ -8731,7 +8731,7 @@ _ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE17_M_re br label %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit _ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE12emplace_backIJRKS4_RKiEEERS5_DpOT_.exit: ; preds = %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE17_M_realloc_insertIJRKS4_RKiEEEvN9__gnu_cxx17__normal_iteratorIPS5_S7_EEDpOT_.exit.i, %174 - %201 = phi ptr [ %198, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE17_M_realloc_insertIJRKS4_RKiEEEvN9__gnu_cxx17__normal_iteratorIPS5_S7_EEDpOT_.exit.i ], [ %178, %174 ] + %201 = phi ptr [ %198, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EE17_M_realloc_insertIJRKS4_RKiEEEvN9__gnu_cxx17__normal_iteratorIPS5_S7_EEDpOT_.exit.i ], [ %178, %176 ] %202 = getelementptr inbounds nuw i8, ptr %.sroa.078.0118, i64 8 %.not110 = icmp eq ptr %202, %143 br i1 %.not110, label %._crit_edge, label %.lr.ph @@ -8796,12 +8796,12 @@ _ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.ex br label %217 217: ; preds = %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit77, %138 - %.pn.pn = phi { ptr, i32 } [ %.pn, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit77 ], [ %139, %138 ] + %.pn.pn = phi { ptr, i32 } [ %.pn, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit77 ], [ %139, %140 ] call void @_ZNSt6vectorISt4pairIS_IS0_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfESaIS8_EED2Ev(ptr noundef nonnull align 8 dereferenceable(24) %0) #29 br label %218 218: ; preds = %217, %136 - %.pn.pn.pn = phi { ptr, i32 } [ %.pn.pn, %217 ], [ %137, %136 ] + %.pn.pn.pn = phi { ptr, i32 } [ %.pn.pn, %219 ], [ %137, %138 ] call void @_ZN13sentencepiece7unigram7LatticeD1Ev(ptr noundef nonnull align 8 dereferenceable(152) %10) #29 call void @llvm.lifetime.end.p0(i64 152, ptr nonnull %10) #29 br label %220 @@ -8810,7 +8810,7 @@ _ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.ex ret void 220: ; preds = %218, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit62, %_ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2Ev.exit45 - %.pn40.pn = phi { ptr, i32 } [ %26, %_ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2Ev.exit45 ], [ %.pn38, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit62 ], [ %.pn.pn.pn, %218 ] + %.pn40.pn = phi { ptr, i32 } [ %26, %_ZNSt4pairISt6vectorIS_ISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EEfED2Ev.exit45 ], [ %.pn38, %_ZNSt6vectorISt4pairISt17basic_string_viewIcSt11char_traitsIcEEiESaIS5_EED2Ev.exit62 ], [ %.pn.pn.pn, %220 ] resume { ptr, i32 } %.pn40.pn } @@ -19614,6 +19614,9 @@ declare i64 @llvm.smin.i64(i64, i64) #25 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: read) declare i32 @bcmp(ptr captures(none), ptr captures(none), i64) local_unnamed_addr #27 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #25 + attributes #0 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { nofree nounwind } diff --git a/bench/sqlite/optimized/sqlite3.ll b/bench/sqlite/optimized/sqlite3.ll index 744c9d298b6..ca4eb932abf 100644 --- a/bench/sqlite/optimized/sqlite3.ll +++ b/bench/sqlite/optimized/sqlite3.ll @@ -317377,8 +317377,8 @@ define internal void @roundFunc(ptr noundef captures(none) %0, i32 noundef %1, p sqlite3_value_int64.exit: ; preds = %18, %22, %25, %27, %29, %31, %34 %.0.i.i = phi i64 [ %19, %18 ], [ %35, %34 ], [ 0, %31 ], [ 0, %29 ], [ %28, %27 ], [ -9223372036854775808, %22 ], [ 9223372036854775807, %25 ] - %spec.store.select = tail call i64 @llvm.smin.i64(i64 %.0.i.i, i64 30) - %spec.store.select2 = tail call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0) + %spec.store.select = tail call i64 @llvm.smax.i64(i64 %.0.i.i, i64 0) + %spec.store.select2 = tail call i64 @llvm.umin.i64(i64 %spec.store.select, i64 30) br label %36 36: ; preds = %sqlite3_value_int64.exit, %3 @@ -317423,7 +317423,7 @@ sqlite3_value_int64.exit: ; preds = %18, %22, %25, %27, br label %sqlite3_value_double.exit sqlite3_value_double.exit: ; preds = %47, %51, %54, %56 - %.0.i.i19 = phi double [ %48, %47 ], [ %53, %51 ], [ %57, %56 ], [ 0.000000e+00, %54 ] + %.0.i.i19 = phi double [ %48, %49 ], [ %53, %53 ], [ %57, %58 ], [ 0.000000e+00, %56 ] %58 = tail call double @llvm.fabs.f64(double %.0.i.i19) %or.cond = fcmp ogt double %58, 0x4330000000000000 br i1 %or.cond, label %sqlite3_free.exit, label %59 @@ -317464,7 +317464,7 @@ sqlite3_value_double.exit: ; preds = %47, %51, %54, %56 br label %sqlite3VdbeMemSetNull.exit.i sqlite3VdbeMemSetNull.exit.i: ; preds = %77, %76 - %78 = phi ptr [ %.pre.i, %76 ], [ %72, %77 ] + %78 = phi ptr [ %.pre.i, %78 ], [ %72, %79 ] %79 = getelementptr inbounds nuw i8, ptr %0, i64 36 store i32 7, ptr %79, align 4, !tbaa !636 %80 = getelementptr inbounds nuw i8, ptr %78, i64 24 @@ -317515,7 +317515,7 @@ sqlite3VdbeMemSetNull.exit.i: ; preds = %77, %76 br i1 %.not1619.i.i, label %sqlite3_result_error_nomem.exit, label %.lr.ph.i.i .lr.ph.i.i: ; preds = %102, %.lr.ph.i.i - %.020.i.i = phi ptr [ %.0.i.i22, %.lr.ph.i.i ], [ %.018.i.i, %102 ] + %.020.i.i = phi ptr [ %.0.i.i22, %.lr.ph.i.i ], [ %.018.i.i, %104 ] %105 = getelementptr inbounds nuw i8, ptr %.020.i.i, i64 52 %106 = load i32, ptr %105, align 4, !tbaa !253 %107 = add nsw i32 %106, 1 @@ -317543,12 +317543,12 @@ sqlite3VdbeMemSetNull.exit.i: ; preds = %77, %76 117: ; preds = %115 %118 = load ptr, ptr getelementptr inbounds nuw (i8, ptr @sqlite3Config, i64 128), align 8, !tbaa !3 - tail call void %118(ptr noundef nonnull %116) #72 + tail call void %120(ptr noundef nonnull %116) #72 br label %sqlite3_mutex_enter.exit.i sqlite3_mutex_enter.exit.i: ; preds = %117, %115 %119 = load ptr, ptr getelementptr inbounds nuw (i8, ptr @sqlite3Config, i64 56), align 8, !tbaa !85 - %120 = tail call i32 %119(ptr noundef nonnull %69) #72 + %120 = tail call i32 %121(ptr noundef nonnull %69) #72 %121 = sext i32 %120 to i64 %122 = load i64, ptr @sqlite3Stat, align 8, !tbaa !14 %123 = sub nsw i64 %122, %121 @@ -317557,19 +317557,19 @@ sqlite3_mutex_enter.exit.i: ; preds = %117, %115 %125 = add nsw i64 %124, -1 store i64 %125, ptr getelementptr inbounds nuw (i8, ptr @sqlite3Stat, i64 72), align 8, !tbaa !14 %126 = load ptr, ptr getelementptr inbounds nuw (i8, ptr @sqlite3Config, i64 40), align 8, !tbaa !163 - tail call void %126(ptr noundef nonnull %69) #72 + tail call void %128(ptr noundef nonnull %69) #72 %127 = load ptr, ptr @mem0, align 8, !tbaa !161 %.not.i4.i = icmp eq ptr %127, null br i1 %.not.i4.i, label %sqlite3_free.exitthread-pre-split, label %128 128: ; preds = %sqlite3_mutex_enter.exit.i %129 = load ptr, ptr getelementptr inbounds nuw (i8, ptr @sqlite3Config, i64 144), align 8, !tbaa !15 - tail call void %129(ptr noundef nonnull %127) #72 + tail call void %131(ptr noundef nonnull %127) #72 br label %sqlite3_free.exitthread-pre-split 130: ; preds = %109 %131 = load ptr, ptr getelementptr inbounds nuw (i8, ptr @sqlite3Config, i64 40), align 8, !tbaa !163 - tail call void %131(ptr noundef nonnull %69) #72 + tail call void %133(ptr noundef nonnull %69) #72 br label %sqlite3_free.exitthread-pre-split sqlite3_free.exitthread-pre-split: ; preds = %sqlite3_mutex_enter.exit.i, %128, %130 @@ -317577,7 +317577,7 @@ sqlite3_free.exitthread-pre-split: ; preds = %sqlite3_mutex_enter br label %sqlite3_free.exit sqlite3_free.exit: ; preds = %sqlite3_free.exitthread-pre-split, %61, %sqlite3_value_double.exit - %132 = phi double [ %.pr, %sqlite3_free.exitthread-pre-split ], [ %66, %61 ], [ %.0.i.i19, %sqlite3_value_double.exit ] + %132 = phi double [ %.pr, %sqlite3_free.exitthread-pre-split ], [ %66, %63 ], [ %.0.i.i19, %sqlite3_value_double.exit ] %133 = load ptr, ptr %0, align 8, !tbaa !634 %134 = getelementptr inbounds nuw i8, ptr %133, i64 20 %135 = load i16, ptr %134, align 4, !tbaa !147 diff --git a/bench/stb/optimized/stb_image_write.ll b/bench/stb/optimized/stb_image_write.ll index 0a7d1a10f3d..3e4ddc641ca 100644 --- a/bench/stb/optimized/stb_image_write.ll +++ b/bench/stb/optimized/stb_image_write.ll @@ -5509,8 +5509,8 @@ define range(i32 0, 2) i32 @stbi_write_jpg_core(ptr noundef readonly captures(no %.not = icmp eq i32 %5, 0 %46 = select i1 %.not, i32 90, i32 %5 %47 = icmp slt i32 %46, 91 - %48 = tail call i32 @llvm.smin.i32(i32 %46, i32 100) - %49 = tail call i32 @llvm.smax.i32(i32 %48, i32 1) + %48 = tail call i32 @llvm.smax.i32(i32 %46, i32 1) + %49 = tail call i32 @llvm.umin.i32(i32 %48, i32 100) %50 = icmp slt i32 %46, 50 br i1 %50, label %51, label %53 @@ -5536,8 +5536,8 @@ define range(i32 0, 2) i32 @stbi_write_jpg_core(ptr noundef readonly captures(no %61 = mul nsw i32 %60, %57 %62 = add nsw i32 %61, 50 %63 = sdiv i32 %62, 100 - %64 = tail call i32 @llvm.smin.i32(i32 %63, i32 255) - %65 = tail call i32 @llvm.smax.i32(i32 %64, i32 1) + %64 = tail call i32 @llvm.smax.i32(i32 %63, i32 1) + %65 = tail call i32 @llvm.umin.i32(i32 %64, i32 255) %66 = trunc nuw i32 %65 to i8 %67 = getelementptr inbounds nuw [64 x i8], ptr @stbiw__jpg_ZigZag, i64 0, i64 %indvars.iv %68 = load i8, ptr %67, align 1, !tbaa !11 @@ -5549,8 +5549,8 @@ define range(i32 0, 2) i32 @stbi_write_jpg_core(ptr noundef readonly captures(no %73 = mul nsw i32 %72, %57 %74 = add nsw i32 %73, 50 %75 = sdiv i32 %74, 100 - %76 = tail call i32 @llvm.smin.i32(i32 %75, i32 255) - %77 = tail call i32 @llvm.smax.i32(i32 %76, i32 1) + %76 = tail call i32 @llvm.smax.i32(i32 %75, i32 1) + %77 = tail call i32 @llvm.umin.i32(i32 %76, i32 255) %78 = trunc nuw i32 %77 to i8 %79 = getelementptr inbounds nuw [64 x i8], ptr %26, i64 0, i64 %69 store i8 %78, ptr %79, align 1, !tbaa !11 @@ -6205,6 +6205,9 @@ declare i32 @llvm.ctlz.i32(i32, i1 immarg) #24 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i32 @llvm.umax.i32(i32, i32) #24 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.umin.i32(i32, i32) #24 + attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { nofree nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/bench/stockfish/optimized/evaluate_nnue.ll b/bench/stockfish/optimized/evaluate_nnue.ll index d7022bd71ce..d24aca3142f 100644 --- a/bench/stockfish/optimized/evaluate_nnue.ll +++ b/bench/stockfish/optimized/evaluate_nnue.ll @@ -799,21 +799,21 @@ _ZNK9Stockfish4Eval4NNUE18FeatureTransformerILj2560EXadL_ZNS_9StateInfo14accumul %158 = shl nuw nsw i64 %indvars.iv, 1 %159 = getelementptr inbounds nuw <2 x i64>, ptr %154, i64 %158 %160 = load <8 x i16>, ptr %159, align 16 - %161 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %160, <8 x i16> splat (i16 127)) - %162 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %161, <8 x i16> zeroinitializer) + %161 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %160, <8 x i16> zeroinitializer) + %162 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %161, <8 x i16> splat (i16 127)) %163 = or disjoint i64 %158, 1 %164 = getelementptr inbounds nuw <2 x i64>, ptr %154, i64 %163 %165 = load <8 x i16>, ptr %164, align 16 - %166 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %165, <8 x i16> splat (i16 127)) - %167 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %166, <8 x i16> zeroinitializer) + %166 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %165, <8 x i16> zeroinitializer) + %167 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %166, <8 x i16> splat (i16 127)) %168 = getelementptr inbounds nuw <2 x i64>, ptr %155, i64 %158 %169 = load <8 x i16>, ptr %168, align 16 - %170 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %169, <8 x i16> splat (i16 127)) - %171 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %170, <8 x i16> zeroinitializer) + %170 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %169, <8 x i16> zeroinitializer) + %171 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %170, <8 x i16> splat (i16 127)) %172 = getelementptr inbounds nuw <2 x i64>, ptr %155, i64 %163 %173 = load <8 x i16>, ptr %172, align 16 - %174 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %173, <8 x i16> splat (i16 127)) - %175 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %174, <8 x i16> zeroinitializer) + %174 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %173, <8 x i16> zeroinitializer) + %175 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %174, <8 x i16> splat (i16 127)) %176 = mul nuw nsw <8 x i16> %171, %162 %177 = mul nuw nsw <8 x i16> %175, %167 %178 = lshr <8 x i16> %176, splat (i16 7) @@ -1423,21 +1423,21 @@ _ZNK9Stockfish4Eval4NNUE18FeatureTransformerILj128EXadL_ZNS_9StateInfo16accumula %154 = shl nuw nsw i64 %indvars.iv, 1 %155 = getelementptr inbounds nuw <2 x i64>, ptr %150, i64 %154 %156 = load <8 x i16>, ptr %155, align 16 - %157 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %156, <8 x i16> splat (i16 127)) - %158 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %157, <8 x i16> zeroinitializer) + %157 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %156, <8 x i16> zeroinitializer) + %158 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %157, <8 x i16> splat (i16 127)) %159 = or disjoint i64 %154, 1 %160 = getelementptr inbounds nuw <2 x i64>, ptr %150, i64 %159 %161 = load <8 x i16>, ptr %160, align 16 - %162 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %161, <8 x i16> splat (i16 127)) - %163 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %162, <8 x i16> zeroinitializer) + %162 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %161, <8 x i16> zeroinitializer) + %163 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %162, <8 x i16> splat (i16 127)) %164 = getelementptr inbounds nuw <2 x i64>, ptr %151, i64 %154 %165 = load <8 x i16>, ptr %164, align 16 - %166 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %165, <8 x i16> splat (i16 127)) - %167 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %166, <8 x i16> zeroinitializer) + %166 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %165, <8 x i16> zeroinitializer) + %167 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %166, <8 x i16> splat (i16 127)) %168 = getelementptr inbounds nuw <2 x i64>, ptr %151, i64 %159 %169 = load <8 x i16>, ptr %168, align 16 - %170 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %169, <8 x i16> splat (i16 127)) - %171 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %170, <8 x i16> zeroinitializer) + %170 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %169, <8 x i16> zeroinitializer) + %171 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %170, <8 x i16> splat (i16 127)) %172 = mul nuw nsw <8 x i16> %167, %158 %173 = mul nuw nsw <8 x i16> %171, %163 %174 = lshr <8 x i16> %172, splat (i16 7) @@ -8619,9 +8619,6 @@ define linkonce_odr dso_local void @_ZNK9Stockfish4Eval4NNUE18FeatureTransformer ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) #4 -; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) #4 - ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none) declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) #9 @@ -9608,6 +9605,9 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #11 ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) declare void @llvm.memmove.p0.p0.i64(ptr writeonly captures(none), ptr readonly captures(none), i64, i1 immarg) #12 +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) #13 + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) declare void @llvm.experimental.noalias.scope.decl(metadata) #13 @@ -9627,8 +9627,8 @@ attributes #9 = { mustprogress nocallback nofree nosync nounwind willreturn memo attributes #10 = { noreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #11 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #12 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -attributes #13 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } -attributes #14 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #13 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #14 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } attributes #15 = { nounwind } attributes #16 = { noreturn nounwind } diff --git a/bench/typst-rs/optimized/4qskctz4kwc33g7b.ll b/bench/typst-rs/optimized/4qskctz4kwc33g7b.ll index 4d8f1fb7d1e..cd21e3901e9 100644 --- a/bench/typst-rs/optimized/4qskctz4kwc33g7b.ll +++ b/bench/typst-rs/optimized/4qskctz4kwc33g7b.ll @@ -72477,8 +72477,8 @@ define hidden range(i48 100, 12884901888) i48 @_ZN5typst4text7variant17hf94498aa call void @llvm.lifetime.end.p0(i64 96, ptr nonnull %5), !noalias !15830 call void @llvm.lifetime.end.p0(i64 88, ptr nonnull %6), !noalias !15817 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %2), !noalias !15814 - %.0.in.sroa.speculate.load.4.sroa.speculated.i = call i64 @llvm.smin.i64(i64 %56, i64 32767) - %spec.select = call i64 @llvm.smax.i64(i64 %.0.in.sroa.speculate.load.4.sroa.speculated.i, i64 -32768) + %.0.in.sroa.speculate.load.4.sroa.speculated.i = call i64 @llvm.smax.i64(i64 %56, i64 -32768) + %spec.select = call i64 @llvm.smin.i64(i64 %.0.in.sroa.speculate.load.4.sroa.speculated.i, i64 32767) %57 = trunc nsw i64 %spec.select to i16 br label %58 diff --git a/bench/typst-rs/optimized/d6l9ieo9tcw33dn.ll b/bench/typst-rs/optimized/d6l9ieo9tcw33dn.ll index e421a3f6ae2..c5a5264b077 100644 --- a/bench/typst-rs/optimized/d6l9ieo9tcw33dn.ll +++ b/bench/typst-rs/optimized/d6l9ieo9tcw33dn.ll @@ -175714,8 +175714,8 @@ define void @"_ZN94_$LT$typst..text..font..variant..FontWeight$u20$as$u20$typst. br label %31 34: ; preds = %26 - %.0.in.sroa.speculate.load.4.sroa.speculated.i = tail call i64 @llvm.smin.i64(i64 %.sroa.041.sroa.0.0.copyload, i64 65535) - %.0.in.sroa.speculated.i = tail call i64 @llvm.smax.i64(i64 %.0.in.sroa.speculate.load.4.sroa.speculated.i, i64 0) + %.0.in.sroa.speculate.load.4.sroa.speculated.i = tail call i64 @llvm.smax.i64(i64 %.sroa.041.sroa.0.0.copyload, i64 0) + %.0.in.sroa.speculated.i = tail call i64 @llvm.umin.i64(i64 %.0.in.sroa.speculate.load.4.sroa.speculated.i, i64 65535) %35 = trunc nuw i64 %.0.in.sroa.speculated.i to i16 %.0.sroa.speculated.i.i = tail call noundef i16 @llvm.umax.i16(i16 %35, i16 100) %.0.sroa.speculated.i1.i = tail call noundef range(i16 100, 901) i16 @llvm.umin.i16(i16 %.0.sroa.speculated.i.i, i16 900) @@ -175739,7 +175739,7 @@ define void @"_ZN94_$LT$typst..text..font..variant..FontWeight$u20$as$u20$typst. %40 = landingpad { ptr, i32 } cleanup invoke void @"_ZN4core3ptr53drop_in_place$LT$typst..foundations..value..Value$GT$17h54cec887ca59b7d7E.llvm.7889846851399105414"(ptr noalias noundef nonnull align 8 dereferenceable(32) %1) #52 - to label %38 unwind label %41 + to label %40 unwind label %41 41: ; preds = %39 %42 = landingpad { ptr, i32 } @@ -200345,9 +200345,6 @@ declare i16 @llvm.abs.i16(i16, i1 immarg) #47 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare range(i8 -1, 2) i8 @llvm.scmp.i8.i64(i64, i64) #47 -; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i64 @llvm.smin.i64(i64, i64) #47 - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.smax.i64(i64, i64) #47 diff --git a/bench/xgboost/optimized/extmem_quantile_dmatrix.ll b/bench/xgboost/optimized/extmem_quantile_dmatrix.ll index 0b657713e45..0d25b158687 100644 --- a/bench/xgboost/optimized/extmem_quantile_dmatrix.ll +++ b/bench/xgboost/optimized/extmem_quantile_dmatrix.ll @@ -16072,10 +16072,10 @@ _ZN7xgboost4data25DefaultFormatStreamPolicyINS_16GHistIndexMatrixENS0_22GHistInd to label %11 unwind label %56 11: ; preds = %_ZN7xgboost4data25DefaultFormatStreamPolicyINS_16GHistIndexMatrixENS0_22GHistIndexFormatPolicyEEC2Ev.exit - %.sroa.speculated24 = call i32 @llvm.smin.i32(i32 %2, i32 16) - %.sroa.speculated = call i32 @llvm.smax.i32(i32 %.sroa.speculated24, i32 2) + %.sroa.speculated24 = call i32 @llvm.smax.i32(i32 %2, i32 2) + %.sroa.speculated = call i32 @llvm.umin.i32(i32 %.sroa.speculated24, i32 16) invoke void @_ZN7xgboost6common10ThreadPoolC2INS_13InitNewThreadEEENS_10StringViewEiOT_(ptr noundef nonnull align 8 dereferenceable(193) %10, ptr nonnull @.str.78, i64 7, i32 noundef %.sroa.speculated, ptr noundef nonnull align 4 dereferenceable(20) %6) - to label %12 unwind label %56 + to label %14 unwind label %56 12: ; preds = %11 call void @llvm.lifetime.end.p0(i64 20, ptr nonnull %6) #15 @@ -16220,7 +16220,7 @@ _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i16 br label %_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit18 _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.exit18: ; preds = %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i16, %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i17, %60 - %.pn = phi { ptr, i32 } [ %61, %60 ], [ %63, %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i17 ], [ %63, %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i16 ] + %.pn = phi { ptr, i32 } [ %61, %62 ], [ %63, %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.thread.i.i17 ], [ %63, %_ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_is_localEv.exit.i.i16 ] call void @_ZN7xgboost6common7MonitorD2Ev(ptr noundef nonnull align 8 dereferenceable(96) %33) #15 %70 = load ptr, ptr %32, align 8, !tbaa !521 %.not.i.i = icmp eq ptr %70, null @@ -16244,13 +16244,13 @@ _ZNSt10unique_ptrISt6vectorISt6futureISt10shared_ptrIN7xgboost16GHistIndexMatrix br label %74 74: ; preds = %_ZNSt10unique_ptrISt6vectorISt6futureISt10shared_ptrIN7xgboost16GHistIndexMatrixEEESaIS6_EESt14default_deleteIS8_EED2Ev.exit, %58 - %.pn.pn.pn = phi { ptr, i32 } [ %.pn, %_ZNSt10unique_ptrISt6vectorISt6futureISt10shared_ptrIN7xgboost16GHistIndexMatrixEEESaIS6_EESt14default_deleteIS8_EED2Ev.exit ], [ %59, %58 ] + %.pn.pn.pn = phi { ptr, i32 } [ %.pn, %_ZNSt10unique_ptrISt6vectorISt6futureISt10shared_ptrIN7xgboost16GHistIndexMatrixEEESaIS6_EESt14default_deleteIS8_EED2Ev.exit ], [ %59, %60 ] call void @_ZNSt12__shared_ptrIN7xgboost4data5CacheELN9__gnu_cxx12_Lock_policyE2EED2Ev(ptr noundef nonnull align 8 dereferenceable(16) %24) #15 call void @_ZN7xgboost6common10ThreadPoolD2Ev(ptr noundef nonnull align 8 dereferenceable(193) %10) #15 br label %75 75: ; preds = %74, %56 - %.pn.pn.pn.pn = phi { ptr, i32 } [ %.pn.pn.pn, %74 ], [ %57, %56 ] + %.pn.pn.pn.pn = phi { ptr, i32 } [ %.pn.pn.pn, %76 ], [ %57, %58 ] %76 = getelementptr inbounds nuw i8, ptr %0, i64 80 call void @_ZNSt12__shared_ptrIN7xgboost16GHistIndexMatrixELN9__gnu_cxx12_Lock_policyE2EED2Ev(ptr noundef nonnull align 8 dereferenceable(16) %76) #15 call void @_ZN7xgboost4data22GHistIndexFormatPolicyINS_16GHistIndexMatrixEED2Ev(ptr noundef nonnull align 8 dereferenceable(32) %8) #15 @@ -20556,10 +20556,10 @@ declare i64 @llvm.smin.i64(i64, i64) #28 declare i64 @llvm.umax.i64(i64, i64) #28 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smin.i32(i32, i32) #28 +declare i32 @llvm.smax.i32(i32, i32) #28 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) -declare i32 @llvm.smax.i32(i32, i32) #28 +declare i32 @llvm.umin.i32(i32, i32) #28 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare i64 @llvm.usub.sat.i64(i64, i64) #28 diff --git a/scripts/setup_pre_commit_patch.sh b/scripts/setup_pre_commit_patch.sh index c5409e09ef4..37def5930e0 100755 --- a/scripts/setup_pre_commit_patch.sh +++ b/scripts/setup_pre_commit_patch.sh @@ -2,7 +2,7 @@ set -euo pipefail shopt -s inherit_errexit -export GITHUB_PATCH_ID="/llvm-project/commit/" +export GITHUB_PATCH_ID=llvm/llvm-project/pull/136665 export COMPTIME_MODE=0 # Please rebase manually