diff --git a/bench/abc/optimized/extraUtilMaj.ll b/bench/abc/optimized/extraUtilMaj.ll index cc1f4880b7b..f58652e46d3 100644 --- a/bench/abc/optimized/extraUtilMaj.ll +++ b/bench/abc/optimized/extraUtilMaj.ll @@ -1075,7 +1075,7 @@ Abc_TtCopy.exit: ; preds = %.lr.ph.i39, %Gem_Gr br i1 %70, label %.lr.ph.i43, label %Abc_TtSwapAdjacent.exit, !llvm.loop !67 71: ; preds = %62 - %72 = icmp samesign ult i32 %indvars119, 7 + %72 = icmp samesign ult i64 %indvars.iv, 7 %73 = trunc i64 %indvars.iv to i32 %74 = add i32 %73, -6 %75 = shl nuw i32 1, %74 diff --git a/bench/abc/optimized/mpmTruth.ll b/bench/abc/optimized/mpmTruth.ll index 8a26ee68f68..7ef7cf81bc3 100644 --- a/bench/abc/optimized/mpmTruth.ll +++ b/bench/abc/optimized/mpmTruth.ll @@ -107,12 +107,12 @@ define range(i32 0, 2) i32 @Mpm_CutComputeTruth(ptr noundef %0, ptr noundef capt br i1 %82, label %89, label %83 83: ; preds = %76 - %84 = trunc nsw i64 %indvars.iv.next.i.i to i32 - %85 = icmp samesign ult i32 %.017.i.i, %84 - br i1 %85, label %86, label %87 + %84 = icmp samesign ugt i64 %indvars.iv.next.i.i, %77 + br i1 %84, label %85, label %87 86: ; preds = %83 - call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %11, i32 noundef %16, i32 noundef %.017.i.i, i32 noundef %84) + %86 = trunc nsw i64 %indvars.iv.next.i.i to i32 + call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %11, i32 noundef %16, i32 noundef %.017.i.i, i32 noundef %86) br label %87 87: ; preds = %86, %83 @@ -153,12 +153,12 @@ Mpm_TruthStretch.exit.i: ; preds = %89, %50 br i1 %107, label %114, label %108 108: ; preds = %101 - %109 = trunc nsw i64 %indvars.iv.next.i53.i to i32 - %110 = icmp samesign ult i32 %.017.i52.i, %109 - br i1 %110, label %111, label %112 + %109 = icmp samesign ugt i64 %indvars.iv.next.i53.i, %102 + br i1 %109, label %110, label %112 111: ; preds = %108 - call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %12, i32 noundef %16, i32 noundef %.017.i52.i, i32 noundef %109) + %111 = trunc nsw i64 %indvars.iv.next.i53.i to i32 + call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %12, i32 noundef %16, i32 noundef %.017.i52.i, i32 noundef %111) br label %112 112: ; preds = %111, %108 @@ -224,12 +224,12 @@ Mpm_TruthStretch.exit55.i: ; preds = %114, %Mpm_TruthStre br i1 %152, label %159, label %153 153: ; preds = %146 - %154 = trunc nsw i64 %indvars.iv.next.i59.i to i32 - %155 = icmp samesign ult i32 %.017.i58.i, %154 - br i1 %155, label %156, label %157 + %154 = icmp samesign ugt i64 %indvars.iv.next.i59.i, %147 + br i1 %154, label %155, label %157 156: ; preds = %153 - call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %13, i32 noundef %16, i32 noundef %.017.i58.i, i32 noundef %154) + %156 = trunc nsw i64 %indvars.iv.next.i59.i to i32 + call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %13, i32 noundef %16, i32 noundef %.017.i58.i, i32 noundef %156) br label %157 157: ; preds = %156, %153 @@ -577,12 +577,12 @@ Abc_TtCopy.exit86.i: ; preds = %.lr.ph.i75.i, %.lr. br i1 %316, label %323, label %317 317: ; preds = %310 - %318 = trunc nsw i64 %indvars.iv.next.i89.i to i32 - %319 = icmp samesign ult i32 %.017.i.i36, %318 - br i1 %319, label %320, label %321 + %318 = icmp samesign ugt i64 %indvars.iv.next.i89.i, %311 + br i1 %318, label %319, label %321 320: ; preds = %317 - tail call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %270, i32 noundef %16, i32 noundef %.017.i.i36, i32 noundef %318) + %320 = trunc nsw i64 %indvars.iv.next.i89.i to i32 + tail call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %270, i32 noundef %16, i32 noundef %.017.i.i36, i32 noundef %320) br label %321 321: ; preds = %320, %317 @@ -632,12 +632,12 @@ Mpm_TruthStretch.exit.i21: ; preds = %Mpm_TruthStretch.ex br i1 %345, label %352, label %346 346: ; preds = %339 - %347 = trunc nsw i64 %indvars.iv.next.i93.i to i32 - %348 = icmp samesign ult i32 %.017.i92.i, %347 - br i1 %348, label %349, label %350 + %347 = icmp samesign ugt i64 %indvars.iv.next.i93.i, %340 + br i1 %347, label %348, label %350 349: ; preds = %346 - tail call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %285, i32 noundef %328, i32 noundef %.017.i92.i, i32 noundef %347) + %349 = trunc nsw i64 %indvars.iv.next.i93.i to i32 + tail call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %285, i32 noundef %328, i32 noundef %.017.i92.i, i32 noundef %349) br label %350 350: ; preds = %349, %346 @@ -752,12 +752,12 @@ Abc_TtCopy.exit111.i: ; preds = %.lr.ph.i100.i, %.lr br i1 %408, label %415, label %409 409: ; preds = %402 - %410 = trunc nsw i64 %indvars.iv.next.i115.i to i32 - %411 = icmp samesign ult i32 %.017.i114.i, %410 - br i1 %411, label %412, label %413 + %410 = icmp samesign ugt i64 %indvars.iv.next.i115.i, %403 + br i1 %410, label %411, label %413 412: ; preds = %409 - tail call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %376, i32 noundef %390, i32 noundef %.017.i114.i, i32 noundef %410) + %412 = trunc nsw i64 %indvars.iv.next.i115.i to i32 + tail call fastcc void @Abc_TtSwapVars(ptr noundef nonnull %376, i32 noundef %390, i32 noundef %.017.i114.i, i32 noundef %412) br label %413 413: ; preds = %412, %409 diff --git a/bench/darktable/optimized/DeflateDecompressor.ll b/bench/darktable/optimized/DeflateDecompressor.ll index bd0e42cd2d9..592225cccfa 100644 --- a/bench/darktable/optimized/DeflateDecompressor.ll +++ b/bench/darktable/optimized/DeflateDecompressor.ll @@ -429,14 +429,14 @@ _ZN8rawspeed25extendBinaryFloatingPointINS_13ieee_754_20088Binary16ENS1_8Binary3 %141 = mul i32 %140, %.sroa.0158.0.extract.trunc %142 = add nsw i32 %141, %115 %143 = icmp samesign ult i32 %142, %26 - call void @llvm.assume(i1 %143) - %144 = icmp sgt i32 %142, -1 call void @llvm.assume(i1 %144) - %145 = zext nneg i32 %142 to i64 - %146 = getelementptr inbounds nuw i8, ptr %89, i64 %145 - %147 = load i8, ptr %146, align 1, !tbaa !96 - %148 = getelementptr inbounds nuw [2 x i8], ptr %8, i64 0, i64 %indvars.iv.i52 - store i8 %147, ptr %148, align 1, !tbaa !96 + %145 = icmp sgt i32 %142, -1 + call void @llvm.assume(i1 %145) + %147 = zext nneg i32 %143 to i64 + %148 = getelementptr inbounds nuw i8, ptr %90, i64 %146 + %148 = load i8, ptr %148, align 1, !tbaa !96 + %149 = getelementptr inbounds nuw [2 x i8], ptr %8, i64 0, i64 %indvars.iv.i52 + store i8 %148, ptr %149, align 1, !tbaa !96 %indvars.iv.next.i53 = add nuw nsw i64 %indvars.iv.i52, 1 %.not.i54 = icmp eq i64 %indvars.iv.next.i53, 2 br i1 %.not.i54, label %116, label %139, !llvm.loop !114 @@ -496,8 +496,7 @@ _ZN8rawspeed25extendBinaryFloatingPointINS_13ieee_754_20088Binary24ENS1_8Binary3 %indvars.iv.i62 = phi i64 [ 0, %.lr.ph.i59 ], [ %indvars.iv.next.i63, %172 ] %173 = mul nsw i64 %indvars.iv.i62, %78 %174 = add nsw i64 %173, %indvars.iv21.i61 - %175 = trunc nsw i64 %174 to i32 - %176 = icmp samesign ugt i32 %26, %175 + %175 = icmp samesign ult i64 %174, %75 call void @llvm.assume(i1 %176) %177 = icmp sgt i64 %174, -1 call void @llvm.assume(i1 %177) @@ -528,21 +527,20 @@ _ZN8rawspeed25extendBinaryFloatingPointINS_13ieee_754_20088Binary24ENS1_8Binary3 br i1 %exitcond.not.i87, label %_ZN8rawspeed12_GLOBAL__N_116decodeFPDeltaRowINS_13ieee_754_20088Binary16EEEvNS_10Array1DRefIKhEEiNS_17CroppedArray1DRefIfEE.exit, label %.lr.ph.i78, !llvm.loop !117 186: ; preds = %186, %.lr.ph.i78 - %indvars.iv.i82 = phi i64 [ 0, %.lr.ph.i78 ], [ %indvars.iv.next.i83, %186 ] + %indvars.iv.i82 = phi i64 [ 0, %.lr.ph.i78 ], [ %indvars.iv.next.i82, %186 ] %187 = mul nsw i64 %indvars.iv.i82, %78 %188 = add nsw i64 %187, %indvars.iv21.i81 - %189 = trunc nsw i64 %188 to i32 - %190 = icmp samesign ugt i32 %26, %189 + %189 = icmp samesign ult i64 %188, %75 + call void @llvm.assume(i1 %189) + %190 = icmp sgt i64 %188, -1 call void @llvm.assume(i1 %190) - %191 = icmp sgt i64 %188, -1 - call void @llvm.assume(i1 %191) - %192 = getelementptr inbounds nuw i8, ptr %89, i64 %188 - %193 = load i8, ptr %192, align 1, !tbaa !96 - %194 = getelementptr inbounds nuw [4 x i8], ptr %6, i64 0, i64 %indvars.iv.i82 - store i8 %193, ptr %194, align 1, !tbaa !96 - %indvars.iv.next.i83 = add nuw nsw i64 %indvars.iv.i82, 1 - %.not.i84 = icmp eq i64 %indvars.iv.next.i83, 4 - br i1 %.not.i84, label %181, label %186, !llvm.loop !118 + %191 = getelementptr inbounds nuw i8, ptr %90, i64 %188 + %192 = load i8, ptr %191, align 1, !tbaa !96 + %192 = getelementptr inbounds nuw [4 x i8], ptr %6, i64 0, i64 %indvars.iv.i81 + store i8 %192, ptr %192, align 1, !tbaa !96 + %194 = add nuw nsw i64 %indvars.iv.i82, 1 + %.not.i83 = icmp eq i64 %194, 4 + br i1 %.not.i83, label %181, label %186, !llvm.loop !118 default.unreachable: ; preds = %_ZN8rawspeed12_GLOBAL__N_116decodeDeltaBytesENS_10Array1DRefIhEEiii.exit unreachable diff --git a/bench/darktable/optimized/IiqDecoder.ll b/bench/darktable/optimized/IiqDecoder.ll index aa02c57db45..4c136976fdd 100644 --- a/bench/darktable/optimized/IiqDecoder.ll +++ b/bench/darktable/optimized/IiqDecoder.ll @@ -4324,27 +4324,27 @@ define hidden void @_ZNK8rawspeed10IiqDecoder16correctBadColumnEt(ptr noundef no %16 = getelementptr inbounds nuw i8, ptr %6, i64 48 %17 = load i32, ptr %16, align 8, !tbaa !220, !noalias !311 %18 = ashr i32 %17, 1 - %19 = mul nuw nsw i32 %18, %15 - %20 = icmp sgt i32 %13, -1 + %19 = icmp sgt i32 %13, -1 + tail call void @llvm.assume(i1 %19) + %20 = icmp sgt i32 %15, -1 tail call void @llvm.assume(i1 %20) - %21 = icmp sgt i32 %15, -1 + %21 = icmp ugt i32 %17, 1 tail call void @llvm.assume(i1 %21) - %22 = icmp ugt i32 %17, 1 + %22 = icmp sgt i32 %18, -1 tail call void @llvm.assume(i1 %22) - %23 = icmp sgt i32 %18, -1 + %23 = icmp samesign uge i32 %18, %13 tail call void @llvm.assume(i1 %23) - %24 = icmp samesign uge i32 %18, %13 - tail call void @llvm.assume(i1 %24) - %25 = icmp eq i32 %13, 0 - %26 = icmp ne i32 %15, 0 - %27 = xor i1 %25, %26 - tail call void @llvm.assume(i1 %27) - %28 = getelementptr inbounds nuw i8, ptr %6, i64 44 - %29 = load i32, ptr %28, align 4, !tbaa !229 - %30 = icmp sgt i32 %29, 4 - br i1 %30, label %.lr.ph, label %._crit_edge + %24 = icmp eq i32 %13, 0 + %25 = icmp ne i32 %15, 0 + %25 = xor i1 %24, %25 + tail call void @llvm.assume(i1 %25) + %27 = getelementptr inbounds nuw i8, ptr %6, i64 44 + %28 = load i32, ptr %27, align 4, !tbaa !229 + %28 = icmp sgt i32 %28, 4 + br i1 %28, label %.lr.ph, label %._crit_edge .lr.ph: ; preds = %2 + %30 = mul nuw nsw i32 %18, %15 %31 = zext i16 %1 to i32 %32 = add nsw i32 %31, -2 %33 = icmp samesign ult i32 %32, %13 @@ -4366,10 +4366,13 @@ define hidden void @_ZNK8rawspeed10IiqDecoder16correctBadColumnEt(ptr noundef no %invariant.gep = getelementptr inbounds nuw i16, ptr %8, i64 %39 %49 = zext nneg i32 %18 to i64 %50 = zext nneg i32 %13 to i64 - %51 = zext nneg i32 %19 to i64 + %51 = zext nneg i32 %30 to i64 %52 = zext nneg i32 %15 to i64 %53 = zext nneg i32 %18 to i64 - br label %54 + %54 = zext nneg i32 %13 to i64 + %55 = zext nneg i32 %30 to i64 + %56 = zext nneg i32 %15 to i64 + br label %57 ._crit_edge: ; preds = %161, %2 ret void @@ -4391,35 +4394,34 @@ define hidden void @_ZNK8rawspeed10IiqDecoder16correctBadColumnEt(ptr noundef no %62 = icmp samesign ult i64 %61, %52 tail call void @llvm.assume(i1 %62) %63 = mul nuw nsw i64 %61, %53 - %64 = trunc i64 %63 to i32 - %65 = add i32 %13, %64 - %66 = icmp samesign ule i32 %65, %19 - tail call void @llvm.assume(i1 %66) - %67 = getelementptr inbounds nuw i16, ptr %8, i64 %63 - %68 = getelementptr inbounds nuw i16, ptr %67, i64 %42 - %69 = load i16, ptr %68, align 2, !tbaa !221 - store i16 %69, ptr %3, align 2, !tbaa !221 - %70 = zext i16 %69 to i32 - %71 = add nuw nsw i64 %indvars.iv436, 1 - %72 = icmp samesign ult i64 %71, %52 - tail call void @llvm.assume(i1 %72) - %73 = mul nuw nsw i64 %71, %49 - %74 = add nuw nsw i64 %73, %50 - %75 = icmp samesign ule i64 %74, %51 - tail call void @llvm.assume(i1 %75) - %76 = getelementptr inbounds nuw i16, ptr %8, i64 %73 - %77 = getelementptr inbounds nuw i16, ptr %76, i64 %42 - %78 = load i16, ptr %77, align 2, !tbaa !221 - store i16 %78, ptr %43, align 2, !tbaa !221 - %79 = zext i16 %78 to i32 - %80 = add nuw nsw i32 %79, %70 + %64 = add nuw nsw i64 %63, %54 + %65 = icmp samesign ule i64 %64, %55 + tail call void @llvm.assume(i1 %65) + %69 = getelementptr inbounds nuw i16, ptr %8, i64 %66 + %67 = getelementptr inbounds nuw i16, ptr %69, i64 %42 + %68 = load i16, ptr %67, align 2, !tbaa !221 + store i16 %71, ptr %3, align 2, !tbaa !221 + %72 = zext i16 %68 to i32 + %70 = add nuw nsw i64 %indvars.iv436, 1 + %71 = icmp samesign ult i64 %73, %56 + tail call void @llvm.assume(i1 %71) + %75 = mul nuw nsw i64 %73, %49 + %73 = add nuw nsw i64 %75, %50 + %74 = icmp samesign ule i64 %73, %51 + tail call void @llvm.assume(i1 %74) + %78 = getelementptr inbounds nuw i16, ptr %8, i64 %75 + %76 = getelementptr inbounds nuw i16, ptr %78, i64 %42 + %77 = load i16, ptr %76, align 2, !tbaa !221 + store i16 %80, ptr %43, align 2, !tbaa !221 + %81 = zext i16 %80 to i32 + %79 = add nuw nsw i32 %81, %72 tail call void @llvm.assume(i1 %45) - %81 = getelementptr inbounds nuw i16, ptr %67, i64 %46 + %81 = getelementptr inbounds nuw i16, ptr %69, i64 %46 %82 = load i16, ptr %81, align 2, !tbaa !221 store i16 %82, ptr %47, align 2, !tbaa !221 %83 = zext i16 %82 to i32 - %84 = add nuw nsw i32 %80, %83 - %85 = getelementptr inbounds nuw i16, ptr %76, i64 %46 + %84 = add nuw nsw i32 %82, %83 + %85 = getelementptr inbounds nuw i16, ptr %78, i64 %46 %86 = load i16, ptr %85, align 2, !tbaa !221 store i16 %86, ptr %48, align 2, !tbaa !221 %87 = zext i16 %86 to i32 @@ -4470,7 +4472,7 @@ define hidden void @_ZNK8rawspeed10IiqDecoder16correctBadColumnEt(ptr noundef no 114: ; preds = %54 %115 = add nuw nsw i64 %indvars.iv436, 2 tail call void @llvm.assume(i1 %33) - %116 = icmp samesign ult i64 %115, %52 + %116 = icmp samesign ult i64 %115, %56 tail call void @llvm.assume(i1 %116) %117 = mul nuw nsw i64 %115, %49 %118 = add nuw nsw i64 %117, %50 @@ -4484,21 +4486,20 @@ define hidden void @_ZNK8rawspeed10IiqDecoder16correctBadColumnEt(ptr noundef no %125 = icmp samesign ult i64 %124, %52 tail call void @llvm.assume(i1 %125) %126 = mul nuw nsw i64 %124, %53 - %127 = trunc i64 %126 to i32 - %128 = add i32 %13, %127 - %129 = icmp samesign ule i32 %128, %19 - tail call void @llvm.assume(i1 %129) - %130 = getelementptr inbounds nuw i16, ptr %8, i64 %126 - %131 = getelementptr inbounds nuw i16, ptr %130, i64 %34 - %132 = load i16, ptr %131, align 2, !tbaa !221 - %133 = zext i16 %132 to i32 - %134 = add nuw nsw i32 %133, %123 + %127 = add nuw nsw i64 %126, %54 + %128 = icmp samesign ule i64 %127, %55 + tail call void @llvm.assume(i1 %128) + %131 = getelementptr inbounds nuw i16, ptr %8, i64 %128 + %130 = getelementptr inbounds nuw i16, ptr %131, i64 %34 + %131 = load i16, ptr %130, align 2, !tbaa !221 + %132 = zext i16 %131 to i32 + %133 = add nuw nsw i32 %132, %125 tail call void @llvm.assume(i1 %36) %135 = getelementptr inbounds nuw i16, ptr %120, i64 %37 %136 = load i16, ptr %135, align 2, !tbaa !221 %137 = zext i16 %136 to i32 - %138 = add nuw nsw i32 %134, %137 - %139 = getelementptr inbounds nuw i16, ptr %130, i64 %37 + %138 = add nuw nsw i32 %135, %137 + %139 = getelementptr inbounds nuw i16, ptr %131, i64 %37 %140 = load i16, ptr %139, align 2, !tbaa !221 %141 = zext i16 %140 to i32 %142 = add nuw nsw i32 %138, %141 @@ -4527,7 +4528,7 @@ define hidden void @_ZNK8rawspeed10IiqDecoder16correctBadColumnEt(ptr noundef no br label %161 161: ; preds = %89, %114 - %indvars.iv.next437.pre-phi = phi i64 [ %71, %89 ], [ %.pre, %114 ] + %indvars.iv.next437.pre-phi = phi i64 [ %73, %89 ], [ %.pre, %114 ] %162 = load ptr, ptr %5, align 8, !tbaa !110 %163 = getelementptr inbounds nuw i8, ptr %162, i64 44 %164 = load i32, ptr %163, align 4, !tbaa !229 diff --git a/bench/darktable/optimized/OlympusDecompressor.ll b/bench/darktable/optimized/OlympusDecompressor.ll index 938dcd7eb22..405ecdaeb29 100644 --- a/bench/darktable/optimized/OlympusDecompressor.ll +++ b/bench/darktable/optimized/OlympusDecompressor.ll @@ -497,19 +497,17 @@ _ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequen call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(12) %scevgep.i29.i.i, i8 0, i64 12, i1 false), !tbaa !92 %117 = icmp samesign ult i64 %indvars.iv.i, 2 %118 = add nsw i64 %indvars.iv.i, -2 - %119 = trunc nsw i64 %118 to i32 - %120 = icmp samesign ugt i32 %94, %119 - %121 = mul nuw nsw i64 %118, %113 - %122 = trunc i64 %121 to i32 - %123 = add i32 %92, %122 - %124 = icmp samesign ule i32 %123, %98 - %125 = getelementptr inbounds nuw i16, ptr %87, i64 %121 - %126 = icmp samesign ult i64 %indvars.iv.i, %115 - %127 = mul nuw nsw i64 %indvars.iv.i, %113 - %128 = add nuw nsw i64 %127, %111 - %129 = icmp samesign ule i64 %128, %114 - %130 = getelementptr inbounds nuw i16, ptr %87, i64 %127 - br label %131 + %119 = icmp samesign ult i64 %118, %115 + %120 = mul nuw nsw i64 %118, %113 + %121 = add nuw nsw i64 %120, %111 + %122 = icmp samesign ule i64 %121, %114 + %123 = getelementptr inbounds nuw i16, ptr %87, i64 %120 + %124 = icmp samesign ult i64 %indvars.iv.i, %115 + %125 = mul nuw nsw i64 %indvars.iv.i, %113 + %126 = add nuw nsw i64 %125, %111 + %127 = icmp samesign ule i64 %126, %114 + %128 = getelementptr inbounds nuw i16, ptr %87, i64 %125 + br label %129 _ZNK8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl15decompressGroupERSt5arrayINS0_24OlympusDifferenceDecoderELm2EERNS_14BitStreamerMSBEii.exit.preheader.i.i: ; preds = %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit25.i.i br i1 %.not254.i.i, label %_ZNK8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl13decompressRowERNS_14BitStreamerMSBEi.exit.i, label %.preheader.i.i @@ -645,40 +643,40 @@ _ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMS %204 = xor i32 %203, %170 %205 = getelementptr inbounds nuw i8, ptr %134, i64 12 %206 = load i32, ptr %205, align 4, !tbaa !92 + %205 = add nsw i32 %204, %202 + %206 = mul nsw i32 %205, 3 %207 = add nsw i32 %206, %204 - %208 = mul nsw i32 %207, 3 - %209 = add nsw i32 %208, %206 - %210 = ashr i32 %209, 5 - store i32 %210, ptr %205, align 4, !tbaa !92 - %211 = icmp sgt i32 %203, 16 - %212 = add nsw i32 %160, 1 - %spec.select.i.i = select i1 %211, i32 0, i32 %212 - %213 = lshr i32 %168, 12 - %214 = and i32 %213, 3 - store i32 %spec.select.i.i, ptr %159, align 8, !tbaa !92 - %215 = shl nsw i32 %207, 2 - %216 = or disjoint i32 %215, %214 - br i1 %117, label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit25.i.i, label %217 - -217: ; preds = %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit18.i.i - %218 = icmp samesign ult i64 %indvars.iv.i.i, %111 - call void @llvm.assume(i1 %218) - call void @llvm.assume(i1 %120) - call void @llvm.assume(i1 %124) - %219 = getelementptr inbounds nuw i16, ptr %125, i64 %indvars.iv.i.i - %220 = load i16, ptr %219, align 2, !tbaa !115 - %221 = zext i16 %220 to i32 + %208 = ashr i32 %207, 5 + store i32 %208, ptr %203, align 4, !tbaa !92 + %210 = icmp sgt i32 %201, 16 + %210 = add nsw i32 %160, 1 + %211 = select i1 %209, i32 0, i32 %210 + %212 = lshr i32 %166, 12 + %spec.select.i.i = and i32 %212, 3 + store i32 %211, ptr %157, align 8, !tbaa !92 + %214 = shl nsw i32 %205, 2 + %214 = or disjoint i32 %213, %spec.select.i.i + br i1 %117, label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit25.i.i, label %215 + +215: ; preds = %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit18.i.i + %216 = icmp samesign ult i64 %indvars.iv.i.i, %111 + call void @llvm.assume(i1 %216) + call void @llvm.assume(i1 %119) + call void @llvm.assume(i1 %122) + %217 = getelementptr inbounds nuw i16, ptr %123, i64 %indvars.iv.i.i + %218 = load i16, ptr %217, align 2, !tbaa !115 + %219 = zext i16 %218 to i32 br label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit25.i.i -_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit25.i.i: ; preds = %217, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit18.i.i - %.0.i23.i.i = phi i32 [ %221, %217 ], [ 0, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit18.i.i ] - %222 = add nsw i32 %216, %.0.i23.i.i +_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit25.i.i: ; preds = %215, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit18.i.i + %.0.i23.i.i = phi i32 [ %219, %215 ], [ 0, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit18.i.i ] + %222 = add nsw i32 %214, %.0.i23.i.i %223 = trunc i32 %222 to i16 %224 = icmp samesign ult i64 %indvars.iv.i.i, %111 call void @llvm.assume(i1 %224) - call void @llvm.assume(i1 %126) - call void @llvm.assume(i1 %129) - %225 = getelementptr inbounds nuw i16, ptr %130, i64 %indvars.iv.i.i + call void @llvm.assume(i1 %124) + call void @llvm.assume(i1 %127) + %225 = getelementptr inbounds nuw i16, ptr %128, i64 %indvars.iv.i.i store i16 %223, ptr %225, align 2, !tbaa !115 %indvars.iv.next.i.i = add nuw nsw i64 %indvars.iv.i.i, 1 %.not.i.i10.i = icmp eq i64 %indvars.iv.next.i.i, 2 @@ -836,78 +834,78 @@ _ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMS %304 = xor i32 %303, %270 %305 = getelementptr inbounds nuw i8, ptr %231, i64 12 %306 = load i32, ptr %305, align 4, !tbaa !92 + %305 = add nsw i32 %304, %302 + %306 = mul nsw i32 %305, 3 %307 = add nsw i32 %306, %304 - %308 = mul nsw i32 %307, 3 - %309 = add nsw i32 %308, %306 - %310 = ashr i32 %309, 5 - store i32 %310, ptr %305, align 4, !tbaa !92 - %311 = icmp sgt i32 %303, 16 - %312 = add nsw i32 %260, 1 - %spec.select234.i.i = select i1 %311, i32 0, i32 %312 - %313 = lshr i32 %268, 12 - %314 = and i32 %313, 3 - store i32 %spec.select234.i.i, ptr %259, align 8, !tbaa !92 - %315 = shl nsw i32 %307, 2 - %316 = or disjoint i32 %315, %314 - %317 = add nsw i64 %230, -2 - %318 = icmp samesign ult i64 %317, %111 - call void @llvm.assume(i1 %318) - %319 = getelementptr inbounds nuw i16, ptr %130, i64 %317 - %320 = load i16, ptr %319, align 2, !tbaa !115 - %321 = zext i16 %320 to i32 - br i1 %117, label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit.i.i, label %322 - -322: ; preds = %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit.i.i - %323 = icmp samesign ult i64 %230, %111 - call void @llvm.assume(i1 %323) - call void @llvm.assume(i1 %120) - call void @llvm.assume(i1 %124) - %324 = getelementptr inbounds nuw i16, ptr %125, i64 %230 - %325 = load i16, ptr %324, align 2, !tbaa !115 - %326 = zext i16 %325 to i32 - %327 = getelementptr inbounds nuw i16, ptr %125, i64 %317 - %328 = load i16, ptr %327, align 2, !tbaa !115 - %329 = zext i16 %328 to i32 - %330 = sub nsw i32 %321, %329 - %331 = sub nsw i32 %326, %329 - %332 = xor i32 %330, %331 - %333 = icmp slt i32 %332, 0 - %334 = icmp ne i16 %320, %328 + %308 = ashr i32 %307, 5 + store i32 %308, ptr %303, align 4, !tbaa !92 + %310 = icmp sgt i32 %301, 16 + %310 = add nsw i32 %260, 1 + %311 = select i1 %309, i32 0, i32 %310 + %312 = lshr i32 %266, 12 + %spec.select234.i.i = and i32 %312, 3 + store i32 %311, ptr %257, align 8, !tbaa !92 + %314 = shl nsw i32 %305, 2 + %314 = or disjoint i32 %313, %spec.select234.i.i + %315 = add nsw i64 %230, -2 + %316 = icmp samesign ult i64 %315, %111 + call void @llvm.assume(i1 %316) + %318 = getelementptr inbounds nuw i16, ptr %128, i64 %315 + %318 = load i16, ptr %317, align 2, !tbaa !115 + %319 = zext i16 %318 to i32 + br i1 %117, label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit.i.i, label %320 + +320: ; preds = %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit.i.i + %321 = icmp samesign ult i64 %228, %111 + call void @llvm.assume(i1 %321) + call void @llvm.assume(i1 %119) + call void @llvm.assume(i1 %122) + %322 = getelementptr inbounds nuw i16, ptr %123, i64 %230 + %323 = load i16, ptr %322, align 2, !tbaa !115 + %324 = zext i16 %323 to i32 + %325 = getelementptr inbounds nuw i16, ptr %123, i64 %315 + %326 = load i16, ptr %325, align 2, !tbaa !115 + %327 = zext i16 %326 to i32 + %328 = sub nsw i32 %319, %327 + %329 = sub nsw i32 %324, %327 + %330 = xor i32 %328, %329 + %331 = icmp slt i32 %330, 0 + %332 = icmp ne i16 %318, %326 + %333 = and i1 %332, %331 + %334 = icmp ne i16 %323, %326 %or.cond3.i.i.i = and i1 %334, %333 - %335 = icmp ne i16 %325, %328 - %or.cond5.i.i.i = and i1 %335, %or.cond3.i.i.i - %336 = call i32 @llvm.abs.i32(i32 %330, i1 true) - br i1 %or.cond5.i.i.i, label %337, label %346 - -337: ; preds = %322 - %338 = icmp samesign ugt i32 %336, 32 - %339 = call i32 @llvm.abs.i32(i32 %331, i1 true) - %340 = icmp samesign ugt i32 %339, 32 - %or.cond27.i.i.i = select i1 %338, i1 true, i1 %340 - br i1 %or.cond27.i.i.i, label %341, label %343 - -341: ; preds = %337 - %342 = add nsw i32 %331, %321 + %335 = call i32 @llvm.abs.i32(i32 %328, i1 true) + br i1 %or.cond5.i.i.i, label %335, label %344 + +335:; preds = %320 + %336 = icmp samesign ugt i32 %334, 32 + %337 = call i32 @llvm.abs.i32(i32 %329, i1 true) + %338 = icmp samesign ugt i32 %337, 32 + %339 = select i1 %336, i1 true, i1 %338 + br i1 %or.cond27.i.i.i, label %339, label %341 + +339: ; preds = %335 + %340 = add nsw i32 %329, %319 br label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit.i.i -343: ; preds = %337 - %344 = add nuw nsw i32 %326, %321 +343: ; preds = %335 + %344 = add nuw nsw i32 %324, %319 %345 = lshr i32 %344, 1 br label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit.i.i -346: ; preds = %322 - %347 = call i32 @llvm.abs.i32(i32 %331, i1 true) - %348 = icmp samesign ugt i32 %336, %347 - %349 = select i1 %348, i32 %321, i32 %326 +346: ; preds = %320 + %347 = call i32 @llvm.abs.i32(i32 %329, i1 true) + %348 = icmp samesign ugt i32 %334, %347 + %349 = select i1 %348, i32 %319, i32 %324 br label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit.i.i -_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit.i.i: ; preds = %346, %343, %341, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit.i.i - %.0.i19.i.i = phi i32 [ %342, %341 ], [ %345, %343 ], [ %349, %346 ], [ %321, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit.i.i ] - %350 = add nsw i32 %316, %.0.i19.i.i +_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImpl7getPredENS_10Array2DRefItEEii.exit.i.i: ; preds = %346, %343, %339, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit.i.i + %.0.i19.i.i = phi i32 [ %340, %339 ], [ %345, %343 ], [ %349, %346 ], [ %319, %_ZN8rawspeed12_GLOBAL__N_124OlympusDifferenceDecoder7getDiffERNS_14BitStreamerMSBE.exit.i.i ] + %350 = add nsw i32 %314, %.0.i19.i.i %351 = trunc i32 %350 to i16 %352 = icmp samesign ult i64 %230, %111 call void @llvm.assume(i1 %352) - %353 = getelementptr inbounds nuw i16, ptr %130, i64 %230 + %353 = getelementptr inbounds nuw i16, ptr %128, i64 %230 store i16 %351, ptr %353, align 2, !tbaa !115 %indvars.iv.next265.i.i = add nuw nsw i64 %indvars.iv264.i.i, 1 %.not.i11.i.i = icmp eq i64 %indvars.iv.next265.i.i, 2 @@ -959,11 +957,11 @@ _ZN8rawspeed9SimpleLUTIaLi12EED2Ev.exit.i: ; preds = %355, %_ZNK8rawspeed %368 = load ptr, ptr %360, align 8, !tbaa !89 %369 = getelementptr inbounds nuw i8, ptr %368, i64 16 %370 = load ptr, ptr %369, align 8 - call void %370(ptr noundef nonnull align 8 dereferenceable(16) %360) #20 + call void %368(ptr noundef nonnull align 8 dereferenceable(16) %360) #20 %371 = load ptr, ptr %360, align 8, !tbaa !89 %372 = getelementptr inbounds nuw i8, ptr %371, i64 24 %373 = load ptr, ptr %372, align 8 - call void %373(ptr noundef nonnull align 8 dereferenceable(16) %360) #20 + call void %371(ptr noundef nonnull align 8 dereferenceable(16) %360) #20 br label %_ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImplD2Ev.exit 374: ; preds = %361 @@ -1008,11 +1006,11 @@ _ZN8rawspeed12_GLOBAL__N_123OlympusDecompressorImplD2Ev.exit: ; preds = %_ZN8raw %390 = load ptr, ptr %382, align 8, !tbaa !89 %391 = getelementptr inbounds nuw i8, ptr %390, i64 16 %392 = load ptr, ptr %391, align 8 - call void %392(ptr noundef nonnull align 8 dereferenceable(16) %382) #20 + call void %390(ptr noundef nonnull align 8 dereferenceable(16) %382) #20 %393 = load ptr, ptr %382, align 8, !tbaa !89 %394 = getelementptr inbounds nuw i8, ptr %393, i64 24 %395 = load ptr, ptr %394, align 8 - call void %395(ptr noundef nonnull align 8 dereferenceable(16) %382) #20 + call void %393(ptr noundef nonnull align 8 dereferenceable(16) %382) #20 br label %_ZN8rawspeed8RawImageD2Ev.exit 396: ; preds = %383 diff --git a/bench/darktable/optimized/PentaxDecompressor.ll b/bench/darktable/optimized/PentaxDecompressor.ll index fc31a096630..16c35d6c6bc 100644 --- a/bench/darktable/optimized/PentaxDecompressor.ll +++ b/bench/darktable/optimized/PentaxDecompressor.ll @@ -1923,7 +1923,10 @@ _ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequen %55 = zext nneg i32 %16 to i64 %56 = zext nneg i32 %11 to i64 %57 = zext nneg i32 %17 to i64 - %58 = zext nneg i32 %16 to i64 + %58 = zext nneg i32 %13 to i64 + %59 = zext nneg i32 %16 to i64 + %60 = zext nneg i32 %11 to i64 + %61 = zext nneg i32 %17 to i64 %wide.trip.count241 = zext nneg i32 %13 to i64 br label %60 @@ -1942,60 +1945,58 @@ _ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequen call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %3) #20 store i64 0, ptr %3, align 8 %61 = icmp samesign ugt i64 %indvars.iv238, 1 - br i1 %61, label %62, label %76 + br i1 %61, label %62, label %77 62: ; preds = %60 %63 = add nsw i64 %indvars.iv238, -2 - %64 = trunc nsw i64 %63 to i32 - %65 = icmp samesign ugt i32 %13, %64 - tail call void @llvm.assume(i1 %65) - %66 = mul nuw nsw i64 %63, %58 - %67 = trunc i64 %66 to i32 - %68 = add i32 %11, %67 - %69 = icmp samesign ule i32 %68, %17 - tail call void @llvm.assume(i1 %69) - %70 = getelementptr inbounds nuw i16, ptr %6, i64 %66 - %71 = load i16, ptr %70, align 2, !tbaa !168 - %72 = zext i16 %71 to i32 - %73 = getelementptr inbounds nuw i8, ptr %70, i64 2 - %74 = load i16, ptr %73, align 2, !tbaa !168 - %75 = zext i16 %74 to i32 - store i32 %72, ptr %3, align 8 - store i32 %75, ptr %.sroa.4.0..sroa_idx, align 4, !tbaa !131 - br label %76 - -76: ; preds = %62, %60 - %77 = load ptr, ptr %48, align 8 - %78 = load ptr, ptr %50, align 8 - %79 = load ptr, ptr %49, align 8 - %80 = ptrtoint ptr %78 to i64 + %64 = icmp samesign ult i64 %63, %58 + tail call void @llvm.assume(i1 %64) + %68 = mul nuw nsw i64 %66, %59 + %66 = add nuw nsw i64 %68, %60 + %67 = icmp samesign ule i64 %66, %61 + tail call void @llvm.assume(i1 %67) + %69 = getelementptr inbounds nuw i16, ptr %6, i64 %68 + %72 = load i16, ptr %69, align 2, !tbaa !168 + %70 = zext i16 %72 to i32 + %71 = getelementptr inbounds nuw i8, ptr %71, i64 2 + %72 = load i16, ptr %71, align 2, !tbaa !168 + %73 = zext i16 %75 to i32 + store i32 %73, ptr %3, align 8 + store i32 %76, ptr %.sroa.4.0..sroa_idx, align 4, !tbaa !131 + br label %77 + +77:; preds = %65, %63 + %78 = load ptr, ptr %48, align 8 + %79 = load ptr, ptr %50, align 8 + %77 = load ptr, ptr %49, align 8 %81 = ptrtoint ptr %79 to i64 - %82 = sub i64 %80, %81 - %83 = ashr exact i64 %82, 2 - %84 = add nsw i64 %83, -1 - %85 = icmp ugt i64 %84, 11 - %86 = load ptr, ptr %51, align 8 - %87 = load ptr, ptr %52, align 8 - %88 = load ptr, ptr %53, align 8 - %89 = load i8, ptr %54, align 1, !range !91 - %90 = trunc nuw i8 %89 to i1 - %91 = mul nuw nsw i64 %indvars.iv238, %55 - %92 = add nuw nsw i64 %91, %56 - %93 = icmp samesign ule i64 %92, %57 - %94 = getelementptr inbounds nuw i16, ptr %6, i64 %91 - br label %95 - -_ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequentialReplenisherIS0_EEEEENS_10Array1DRefIKSt4byteEE.exit: ; preds = %197 + %82 = ptrtoint ptr %80 to i64 + %83 = sub i64 %81, %82 + %84 = ashr exact i64 %83, 2 + %85 = add nsw i64 %84, -1 + %86 = icmp ugt i64 %85, 11 + %87 = load ptr, ptr %51, align 8 + %88 = load ptr, ptr %52, align 8 + %89 = load ptr, ptr %53, align 8 + %90 = load i8, ptr %54, align 1, !range !91 + %91 = trunc nuw i8 %90 to i1 + %92 = mul nuw nsw i64 %indvars.iv238, %55 + %93 = add nuw nsw i64 %92, %56 + %94 = icmp samesign ule i64 %93, %57 + %95 = getelementptr inbounds nuw i16, ptr %6, i64 %92 + br label %96 + +_ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequentialReplenisherIS0_EEEEENS_10Array1DRefIKSt4byteEE.exit: ; preds = %198 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %3) #20 %indvars.iv.next239 = add nuw nsw i64 %indvars.iv238, 1 %exitcond242.not = icmp eq i64 %indvars.iv.next239, %wide.trip.count241 br i1 %exitcond242.not, label %_ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequentialReplenisherIS0_EEEEENS_10Array1DRefIKSt4byteEE.exit._crit_edge, label %60, !llvm.loop !169 -95: ; preds = %76, %197 - %indvars.iv = phi i64 [ 0, %76 ], [ %indvars.iv.next, %197 ] - %.sroa.4663.1221 = phi i32 [ %.sroa.4663.0225, %76 ], [ %.sroa.4663.2, %197 ] - %.sroa.16.1220 = phi i32 [ %.sroa.16.0224, %76 ], [ %.sroa.16.3, %197 ] - %.sroa.037.1219 = phi i64 [ %.sroa.037.0223, %76 ], [ %.sroa.037.3, %197 ] +95: ; preds = %77, %197 + %indvars.iv = phi i64 [ 0, %77 ], [ %indvars.iv.next, %197 ] + %.sroa.4663.1221 = phi i32 [ %.sroa.4663.0225, %77 ], [ %.sroa.4663.2, %197 ] + %.sroa.16.1220 = phi i32 [ %.sroa.16.0224, %77 ], [ %.sroa.16.3, %197 ] + %.sroa.037.1219 = phi i64 [ %.sroa.037.0223, %77 ], [ %.sroa.037.3, %197 ] %96 = icmp samesign ult i32 %.sroa.16.1220, 65 tail call void @llvm.assume(i1 %96) %.not.i31 = icmp samesign ult i32 %.sroa.16.1220, 32 @@ -2052,7 +2053,7 @@ _ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialR %.sroa.16.4 = phi i32 [ %114, %_ZN8rawspeed39BitStreamerForwardSequentialReplenisherINS_14BitStreamerMSBEE8getInputEv.exit.i ], [ %.sroa.16.1220, %95 ] %.sroa.4663.2 = phi i32 [ %98, %_ZN8rawspeed39BitStreamerForwardSequentialReplenisherINS_14BitStreamerMSBEE8getInputEv.exit.i ], [ %.sroa.4663.1221, %95 ] %119 = lshr i64 %.sroa.037.4, 53 - %120 = getelementptr inbounds nuw i32, ptr %77, i64 %119 + %120 = getelementptr inbounds nuw i32, ptr %78, i64 %119 %121 = load i32, ptr %120, align 4, !tbaa !132 %122 = ashr i32 %121, 9 %123 = and i32 %121, 255 @@ -2067,105 +2068,105 @@ _ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialR 129: ; preds = %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit %.not17.i = icmp eq i32 %121, 0 - br i1 %.not17.i, label %134, label %130 + br i1 %.not17.i, label %135, label %130 130: ; preds = %129 - %131 = trunc i32 %121 to i8 %132 = trunc i32 %122 to i8 - %133 = icmp ne i8 %132, 0 - tail call void @llvm.assume(i1 %133) - br label %169 - -134: ; preds = %129 - %135 = icmp samesign ugt i32 %125, 10 - tail call void @llvm.assume(i1 %135) - %136 = add nsw i32 %125, -11 - %137 = shl i64 %127, 11 - %138 = trunc nuw nsw i64 %119 to i32 - %.sroa.0.018.i = trunc nuw nsw i64 %119 to i16 - br i1 %85, label %.lr.ph.i, label %.critedge.i - -.lr.ph.i: ; preds = %134, %.critedge2.i - %.sroa.037.6 = phi i64 [ %149, %.critedge2.i ], [ %137, %134 ] - %.sroa.16.6 = phi i32 [ %148, %.critedge2.i ], [ %136, %134 ] - %139 = phi i64 [ %154, %.critedge2.i ], [ 11, %134 ] - %.sroa.0.021.i = phi i16 [ %.sroa.0.0.i, %.critedge2.i ], [ %.sroa.0.018.i, %134 ] - %.sroa.8.020.i = phi i8 [ %153, %.critedge2.i ], [ 11, %134 ] - %.sroa.0.0.in19.i = phi i32 [ %152, %.critedge2.i ], [ %138, %134 ] - %140 = getelementptr inbounds nuw i16, ptr %86, i64 %139 - %141 = load i16, ptr %140, align 2, !tbaa !168 - %142 = icmp eq i16 %141, -1 - %143 = icmp ult i16 %141, %.sroa.0.021.i - %or.cond.i = select i1 %142, i1 true, i1 %143 + %133 = trunc i32 %123 to i8 + %134 = icmp ne i8 %133, 0 + tail call void @llvm.assume(i1 %134) + br label %170 + +135:; preds = %130 + %136 = icmp samesign ugt i32 %125, 10 + tail call void @llvm.assume(i1 %136) + %137 = add nsw i32 %126, -11 + %138 = shl i64 %128, 11 + %.sroa.0.018.i = trunc nuw nsw i64 %119 to i32 + %.sroa.0.018.i = trunc nuw nsw i64 %120 to i16 + br i1 %86, label %.lr.ph.i, label %.critedge.i + +.lr.ph.i:; preds = %135, %.critedge2.i + %.sroa.16.6 = phi i64 [ %150, %.critedge2.i ], [ %138, %135 ] + %139 = phi i32 [ %149, %.critedge2.i ], [ %137, %135 ] + %.sroa.0.021.i = phi i64 [ %155, %.critedge2.i ], [ 11, %135 ] + %.sroa.8.020.i = phi i16 [ %155, %.critedge2.i ], [ %.sroa.0.018.i, %135 ] + %.sroa.0.0.in19.i = phi i8 [ %154, %.critedge2.i ], [ 11, %135 ] + %140 = phi i32 [ %153, %.critedge2.i ], [ %139, %135 ] + %141 = getelementptr inbounds nuw i16, ptr %87, i64 %140 + %142 = load i16, ptr %141, align 2, !tbaa !168 + %143 = icmp eq i16 %142, -1 + %or.cond.i = icmp ult i16 %142, %.sroa.0.021.i + %or.cond.i = select i1 %143, i1 true, i1 %144 br i1 %or.cond.i, label %.critedge2.i, label %.critedge.i .critedge2.i: ; preds = %.lr.ph.i %144 = icmp samesign ult i32 %.sroa.16.6, 65 - tail call void @llvm.assume(i1 %144) - %145 = icmp ne i32 %.sroa.16.6, 0 tail call void @llvm.assume(i1 %145) - %146 = lshr i64 %.sroa.037.6, 63 - %147 = trunc nuw nsw i64 %146 to i32 - %148 = add nsw i32 %.sroa.16.6, -1 - %149 = shl i64 %.sroa.037.6, 1 - %150 = shl nsw i32 %.sroa.0.0.in19.i, 1 - %151 = and i32 %150, 131070 - %152 = or disjoint i32 %151, %147 - %153 = add i8 %.sroa.8.020.i, 1 - %.sroa.0.0.i = trunc i32 %152 to i16 - %154 = zext i8 %153 to i64 - %155 = icmp ugt i64 %84, %154 - br i1 %155, label %.lr.ph.i, label %.critedge.i, !llvm.loop !170 - -.critedge.i: ; preds = %.critedge2.i, %.lr.ph.i, %134 - %.sroa.037.5 = phi i64 [ %137, %134 ], [ %.sroa.037.6, %.lr.ph.i ], [ %149, %.critedge2.i ] - %.sroa.16.5 = phi i32 [ %136, %134 ], [ %.sroa.16.6, %.lr.ph.i ], [ %148, %.critedge2.i ] - %.sroa.0.0.in.lcssa.i = phi i32 [ %138, %134 ], [ %.sroa.0.0.in19.i, %.lr.ph.i ], [ %152, %.critedge2.i ] - %.sroa.8.0.lcssa.i = phi i8 [ 11, %134 ], [ %.sroa.8.020.i, %.lr.ph.i ], [ %153, %.critedge2.i ] - %.sroa.0.0.lcssa.i = phi i16 [ %.sroa.0.018.i, %134 ], [ %.sroa.0.021.i, %.lr.ph.i ], [ %.sroa.0.0.i, %.critedge2.i ] - %.lcssa17.i = phi i64 [ 11, %134 ], [ %139, %.lr.ph.i ], [ %154, %.critedge2.i ] - %156 = icmp ult i64 %84, %.lcssa17.i - br i1 %156, label %161, label %157 - -157: ; preds = %.critedge.i - %158 = getelementptr inbounds nuw i16, ptr %86, i64 %.lcssa17.i - %159 = load i16, ptr %158, align 2, !tbaa !168 - %160 = icmp ult i16 %159, %.sroa.0.0.lcssa.i - br i1 %160, label %161, label %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit - -161: ; preds = %157, %.critedge.i - %162 = and i32 %.sroa.0.0.in.lcssa.i, 65535 - %163 = zext i8 %.sroa.8.0.lcssa.i to i32 - tail call void (ptr, ...) @_ZN8rawspeed14ThrowExceptionINS_19RawDecoderExceptionEEEvPKcz(ptr noundef nonnull @.str.29, ptr noundef nonnull @__PRETTY_FUNCTION__._ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_, i32 noundef %162, i32 noundef %163) #14 + %146 = icmp ne i32 %.sroa.16.6, 0 + tail call void @llvm.assume(i1 %146) + %148 = lshr i64 %.sroa.16.6, 63 + %149 = trunc nuw nsw i64 %147 to i32 + %150 = add nsw i32 %.sroa.16.6, -1 + %151 = shl i64 %139, 1 + %152 = shl nsw i32 %140, 1 + %153 = and i32 %152, 131070 + %.sroa.0.0.i = or disjoint i32 %152, %148 + %154 = add i8 %.sroa.8.020.i, 1 + %155 = trunc i32 %153 to i16 + %155 = zext i8 %154 to i64 + %156 = icmp ugt i64 %85, %155 + br i1 %156, label %.lr.ph.i, label %.critedge.i, !llvm.loop !170 + +.critedge.i: ; preds = %.critedge2.i, %.lr.ph.i, %135 + %.sroa.0.0.in.lcssa.i = phi i64 [ %138, %135 ], [ %.sroa.16.6, %.lr.ph.i ], [ %150, %.critedge2.i ] + %.sroa.8.0.lcssa.i = phi i32 [ %137, %135 ], [ %.sroa.16.6, %.lr.ph.i ], [ %150, %.critedge2.i ] + %.sroa.0.0.lcssa.i = phi i32 [ %.sroa.0.018.i, %135 ], [ %.sroa.0.0.in19.i, %.lr.ph.i ], [ %.sroa.0.0.i, %.critedge2.i ] + %.lcssa17.i = phi i8 [ 11, %135 ], [ %140, %.lr.ph.i ], [ %154, %.critedge2.i ] + %156 = phi i16 [ %.sroa.0.018.i, %135 ], [ %.sroa.0.021.i, %.lr.ph.i ], [ %155, %.critedge2.i ] + %.lcssa17.i = phi i64 [ 11, %135 ], [ %140, %.lr.ph.i ], [ %155, %.critedge2.i ] + %157 = icmp ult i64 %85, %.lcssa17.i + br i1 %157, label %162, label %158 + +158:; preds = %.critedge.i + %160 = getelementptr inbounds nuw i16, ptr %87, i64 %.lcssa17.i + %160 = load i16, ptr %159, align 2, !tbaa !168 + %161 = icmp ult i16 %160, %.sroa.0.0.lcssa.i + br i1 %161, label %159, label %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit + +162:; preds = %158, %.critedge.i + %160 = and i32 %156, 65535 + %164 = zext i8 %.sroa.8.0.lcssa.i to i32 + tail call void (ptr, ...) @_ZN8rawspeed14ThrowExceptionINS_19RawDecoderExceptionEEEvPKcz(ptr noundef nonnull @.str.29, ptr noundef nonnull @__PRETTY_FUNCTION__._ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_, i32 noundef %163, i32 noundef %164) #14 unreachable -_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit: ; preds = %157 +_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit: ; preds = %158 %.sroa.0.0.mask.i = and i32 %.sroa.0.0.in.lcssa.i, 65535 - %164 = getelementptr inbounds nuw i16, ptr %87, i64 %.lcssa17.i + %164 = getelementptr inbounds nuw i16, ptr %88, i64 %.lcssa17.i %165 = load i16, ptr %164, align 2, !tbaa !168 %.tr.i = zext i16 %165 to i32 %.narrow.i = sub nsw i32 %.sroa.0.0.mask.i, %.tr.i %166 = zext i32 %.narrow.i to i64 - %167 = getelementptr inbounds nuw i8, ptr %88, i64 %166 + %167 = getelementptr inbounds nuw i8, ptr %89, i64 %166 %168 = load i8, ptr %167, align 1, !tbaa !131 br label %169 169: ; preds = %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit, %130 - %.0202 = phi i8 [ %168, %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit ], [ %132, %130 ] - %.sroa.0.sroa.6.0 = phi i8 [ %.sroa.8.0.lcssa.i, %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit ], [ %131, %130 ] + %.0202 = phi i8 [ %168, %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit ], [ %133, %130 ] + %.sroa.0.sroa.6.0 = phi i8 [ %.sroa.8.0.lcssa.i, %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit ], [ %132, %130 ] %.sroa.037.2 = phi i64 [ %.sroa.037.5, %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit ], [ %127, %130 ] %.sroa.16.2 = phi i32 [ %.sroa.16.5, %_ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPartialSymbolINS_14BitStreamerMSBEEESt4pairINS_18AbstractPrefixCodeIS1_E10CodeSymbolEiERT_S8_.exit ], [ %125, %130 ] %170 = icmp ult i8 %.sroa.0.sroa.6.0, 17 - tail call void @llvm.assume(i1 %170) - %171 = icmp ult i8 %.0202, 17 tail call void @llvm.assume(i1 %171) - switch i8 %.0202, label %177 [ - i8 16, label %172 + %172 = icmp ult i8 %.0202, 17 + tail call void @llvm.assume(i1 %172) + switch i8 %.0202, label %178 [ + i8 16, label %173 i8 0, label %_ZNK8rawspeed20PrefixCodeLUTDecoderINS_15BaselineCodeTagENS_23PrefixCodeLookupDecoderIS1_EEE6decodeINS_14BitStreamerMSBELb1EEEiRT_.exit ] 172: ; preds = %169 - br i1 %90, label %173, label %_ZNK8rawspeed20PrefixCodeLUTDecoderINS_15BaselineCodeTagENS_23PrefixCodeLookupDecoderIS1_EEE6decodeINS_14BitStreamerMSBELb1EEEiRT_.exit + br i1 %91, label %173, label %_ZNK8rawspeed20PrefixCodeLUTDecoderINS_15BaselineCodeTagENS_23PrefixCodeLookupDecoderIS1_EEE6decodeINS_14BitStreamerMSBELb1EEEiRT_.exit 173: ; preds = %172 %174 = icmp samesign ugt i32 %.sroa.16.2, 15 @@ -2194,7 +2195,7 @@ _ZNK8rawspeed23PrefixCodeLookupDecoderINS_15BaselineCodeTagEE26finishReadingPart _ZNK8rawspeed20PrefixCodeLUTDecoderINS_15BaselineCodeTagENS_23PrefixCodeLookupDecoderIS1_EEE6decodeINS_14BitStreamerMSBELb1EEEiRT_.exit: ; preds = %177, %173, %172, %169, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit %.sroa.037.3 = phi i64 [ %127, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit ], [ %186, %177 ], [ %.sroa.037.2, %169 ], [ %176, %173 ], [ %.sroa.037.2, %172 ] - %.sroa.16.3 = phi i32 [ %125, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit ], [ %184, %177 ], [ %.sroa.16.2, %169 ], [ %175, %173 ], [ %.sroa.16.2, %172 ] + %.sroa.16.3 = phi i32 [ %125, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit ], [ %185, %178 ], [ %.sroa.16.2, %170 ], [ %176, %174 ], [ %.sroa.16.2, %173 ] %.0.i = phi i32 [ %122, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit ], [ %.0.i.i, %177 ], [ 0, %169 ], [ -32768, %173 ], [ -32768, %172 ] %189 = and i64 %indvars.iv, 1 %190 = getelementptr inbounds nuw [2 x i32], ptr %3, i64 0, i64 %189 @@ -2212,8 +2213,8 @@ _ZNK8rawspeed20PrefixCodeLUTDecoderINS_15BaselineCodeTagENS_23PrefixCodeLookupDe 197: ; preds = %_ZNK8rawspeed20PrefixCodeLUTDecoderINS_15BaselineCodeTagENS_23PrefixCodeLookupDecoderIS1_EEE6decodeINS_14BitStreamerMSBELb1EEEiRT_.exit %198 = trunc nuw i32 %192 to i16 - tail call void @llvm.assume(i1 %93) - %199 = getelementptr inbounds nuw i16, ptr %94, i64 %indvars.iv + tail call void @llvm.assume(i1 %94) + %199 = getelementptr inbounds nuw i16, ptr %95, i64 %indvars.iv store i16 %198, ptr %199, align 2, !tbaa !168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %56 diff --git a/bench/darktable/optimized/RafDecoder.ll b/bench/darktable/optimized/RafDecoder.ll index cb82c5f6599..fca2f7e62a3 100644 --- a/bench/darktable/optimized/RafDecoder.ll +++ b/bench/darktable/optimized/RafDecoder.ll @@ -2600,7 +2600,7 @@ _ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit: ; preds = %127 = getelementptr inbounds nuw i8, ptr %115, i64 288 %128 = getelementptr inbounds nuw i8, ptr %124, i64 272 %129 = invoke noundef nonnull align 8 dereferenceable(24) ptr @_ZNSt6vectorIN8rawspeed12NotARationalIiEESaIS2_EEaSERKS4_(ptr noundef nonnull align 8 dereferenceable(24) %127, ptr noundef nonnull align 8 dereferenceable(24) %128) - to label %.noexc108 unwind label %344 + to label %.noexc108 unwind label %346 .noexc108: ; preds = %123 %130 = getelementptr inbounds nuw i8, ptr %115, i64 312 @@ -2609,43 +2609,43 @@ _ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit: ; preds = %132 = getelementptr inbounds nuw i8, ptr %115, i64 328 %133 = getelementptr inbounds nuw i8, ptr %124, i64 312 invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_(ptr noundef nonnull align 8 dereferenceable(32) %132, ptr noundef nonnull align 8 dereferenceable(32) %133) - to label %.noexc109 unwind label %344 + to label %.noexc109 unwind label %346 .noexc109: ; preds = %.noexc108 %134 = getelementptr inbounds nuw i8, ptr %115, i64 360 %135 = getelementptr inbounds nuw i8, ptr %124, i64 344 invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_(ptr noundef nonnull align 8 dereferenceable(32) %134, ptr noundef nonnull align 8 dereferenceable(32) %135) - to label %.noexc110 unwind label %344 + to label %.noexc110 unwind label %346 .noexc110: ; preds = %.noexc109 %136 = getelementptr inbounds nuw i8, ptr %115, i64 392 %137 = getelementptr inbounds nuw i8, ptr %124, i64 376 invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_(ptr noundef nonnull align 8 dereferenceable(32) %136, ptr noundef nonnull align 8 dereferenceable(32) %137) - to label %.noexc111 unwind label %344 + to label %.noexc111 unwind label %346 .noexc111: ; preds = %.noexc110 %138 = getelementptr inbounds nuw i8, ptr %115, i64 424 %139 = getelementptr inbounds nuw i8, ptr %124, i64 408 invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_(ptr noundef nonnull align 8 dereferenceable(32) %138, ptr noundef nonnull align 8 dereferenceable(32) %139) - to label %.noexc112 unwind label %344 + to label %.noexc112 unwind label %346 .noexc112: ; preds = %.noexc111 %140 = getelementptr inbounds nuw i8, ptr %115, i64 456 %141 = getelementptr inbounds nuw i8, ptr %124, i64 440 invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_(ptr noundef nonnull align 8 dereferenceable(32) %140, ptr noundef nonnull align 8 dereferenceable(32) %141) - to label %.noexc113 unwind label %344 + to label %.noexc113 unwind label %346 .noexc113: ; preds = %.noexc112 %142 = getelementptr inbounds nuw i8, ptr %115, i64 488 %143 = getelementptr inbounds nuw i8, ptr %124, i64 472 invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_(ptr noundef nonnull align 8 dereferenceable(32) %142, ptr noundef nonnull align 8 dereferenceable(32) %143) - to label %.noexc114 unwind label %344 + to label %.noexc114 unwind label %346 .noexc114: ; preds = %.noexc113 %144 = getelementptr inbounds nuw i8, ptr %115, i64 520 %145 = getelementptr inbounds nuw i8, ptr %124, i64 504 invoke void @_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_(ptr noundef nonnull align 8 dereferenceable(32) %144, ptr noundef nonnull align 8 dereferenceable(32) %145) - to label %146 unwind label %344 + to label %146 unwind label %346 146: ; preds = %.noexc114 %147 = getelementptr inbounds nuw i8, ptr %124, i64 536 @@ -2731,7 +2731,10 @@ _ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit: ; preds = %203 = zext i32 %.sroa.0165.0 to i64 %204 = zext nneg i32 %157 to i64 %205 = sext i32 %.sroa.8.0 to i64 - %206 = zext nneg i32 %162 to i64 + %206 = zext nneg i32 %159 to i64 + %207 = zext nneg i32 %162 to i64 + %208 = zext nneg i32 %157 to i64 + %209 = zext nneg i32 %163 to i64 %wide.trip.count217 = zext nneg i32 %.sroa.12.0 to i64 %wide.trip.count = zext nneg i32 %.sroa.0169.0 to i64 br label %.preheader.us @@ -2740,7 +2743,10 @@ _ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit: ; preds = %207 = zext i32 %.sroa.0165.0 to i64 %208 = zext nneg i32 %157 to i64 %209 = sext i32 %.sroa.8.0 to i64 - %210 = zext nneg i32 %162 to i64 + %210 = zext nneg i32 %159 to i64 + %214 = zext nneg i32 %162 to i64 + %215 = zext nneg i32 %157 to i64 + %216 = zext nneg i32 %163 to i64 %wide.trip.count227 = zext nneg i32 %.sroa.12.0 to i64 %wide.trip.count222 = zext nneg i32 %.sroa.0169.0 to i64 br label %.preheader.us.us @@ -2751,28 +2757,26 @@ _ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit: ; preds = %.neg200.reass.reass.us.us = add i32 %invariant.op206, %211 %212 = add nuw nsw i64 %indvars.iv224, %209 %213 = icmp sgt i64 %212, -1 - %214 = trunc nsw i64 %212 to i32 - %215 = icmp samesign ugt i32 %159, %214 - %216 = mul nuw nsw i64 %212, %210 - %217 = trunc i64 %216 to i32 - %218 = add i32 %157, %217 - %219 = icmp samesign ule i32 %218, %163 - %220 = getelementptr inbounds nuw i16, ptr %152, i64 %216 - br label %221 - -221: ; preds = %232, %.preheader.us.us - %indvars.iv219 = phi i64 [ %indvars.iv.next220, %232 ], [ 0, %.preheader.us.us ] - %222 = trunc nuw nsw i64 %indvars.iv219 to i32 - %223 = lshr i32 %222, 1 - %224 = sub i32 %.neg200.reass.reass.us.us, %223 + %214 = icmp samesign ult i64 %212, %213 + %215 = mul nuw nsw i64 %218, %214 + %216 = add nuw nsw i64 %221, %215 + %217 = icmp samesign ule i64 %216, %216 + %218 = getelementptr inbounds nuw i16, ptr %152, i64 %221 + br label %225 + +225:; preds = %232, %.preheader.us.us + %indvars.iv219 = phi i64 [ %indvars.iv.next220, %236 ], [ 0, %.preheader.us.us ] + %226 = trunc nuw nsw i64 %indvars.iv219 to i32 + %indvars.iv219 = lshr i32 %226, 1 + %222 = sub i32 %.neg200.reass.reass.us.us, %indvars.iv219 %indvars.iv.next220 = add nuw nsw i64 %indvars.iv219, 1 %225 = trunc nuw nsw i64 %indvars.iv.next220 to i32 %226 = lshr i32 %225, 1 %227 = add nuw nsw i32 %226, %211 - %228 = icmp slt i32 %224, %200 + %228 = icmp slt i32 %228, %200 br i1 %228, label %229, label %.split.us -229: ; preds = %221 +229: ; preds = %225 %230 = load i32, ptr %196, align 8, !tbaa !111 %231 = icmp slt i32 %227, %230 br i1 %231, label %232, label %.split.us @@ -2781,31 +2785,31 @@ _ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit: ; preds = %233 = add nuw nsw i64 %indvars.iv219, %207 %234 = and i64 %233, 2147483648 %235 = icmp eq i64 %234, 0 - call void @llvm.assume(i1 %235) - %236 = icmp samesign ult i64 %233, %208 - call void @llvm.assume(i1 %236) - call void @llvm.assume(i1 %213) - call void @llvm.assume(i1 %215) - call void @llvm.assume(i1 %219) - %237 = getelementptr inbounds nuw i16, ptr %220, i64 %233 - %238 = load i16, ptr %237, align 2, !tbaa !178 - %239 = icmp samesign ult i32 %227, %178 call void @llvm.assume(i1 %239) - %240 = icmp sgt i32 %224, -1 + %240 = icmp samesign ult i64 %237, %211 call void @llvm.assume(i1 %240) - %241 = icmp samesign ult i32 %224, %180 - call void @llvm.assume(i1 %241) - %242 = mul nuw nsw i32 %224, %183 - %243 = add nuw nsw i32 %242, %178 - %244 = icmp samesign ule i32 %243, %184 + call void @llvm.assume(i1 %219) + call void @llvm.assume(i1 %214) + call void @llvm.assume(i1 %223) + %243 = getelementptr inbounds nuw i16, ptr %224, i64 %237 + %244 = load i16, ptr %243, align 2, !tbaa !178 + %243 = icmp samesign ult i32 %231, %178 + call void @llvm.assume(i1 %243) + %244 = icmp sgt i32 %228, -1 call void @llvm.assume(i1 %244) - %245 = zext nneg i32 %242 to i64 - %246 = getelementptr inbounds nuw i16, ptr %173, i64 %245 - %247 = zext nneg i32 %227 to i64 - %248 = getelementptr inbounds nuw i16, ptr %246, i64 %247 - store i16 %238, ptr %248, align 2, !tbaa !178 + %245 = icmp samesign ult i32 %228, %180 + call void @llvm.assume(i1 %245) + %247 = mul nuw nsw i32 %228, %183 + %248 = add nuw nsw i32 %246, %178 + %248 = icmp samesign ule i32 %247, %184 + call void @llvm.assume(i1 %248) + %249 = zext nneg i32 %246 to i64 + %250 = getelementptr inbounds nuw i16, ptr %173, i64 %249 + %251 = zext nneg i32 %231 to i64 + %252 = getelementptr inbounds nuw i16, ptr %250, i64 %251 + store i16 %242, ptr %252, align 2, !tbaa !178 %exitcond223.not = icmp eq i64 %indvars.iv.next220, %wide.trip.count222 - br i1 %exitcond223.not, label %._crit_edge.split.us.us.us, label %221, !llvm.loop !180 + br i1 %exitcond223.not, label %._crit_edge.split.us.us.us, label %225, !llvm.loop !180 ._crit_edge.split.us.us.us: ; preds = %232 %indvars.iv.next225 = add nuw nsw i64 %indvars.iv224, 1 @@ -2822,62 +2826,60 @@ _ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit: ; preds = %253 = lshr i64 %indvars.iv.next215, 1 %254 = add nuw nsw i64 %indvars.iv214, %205 %255 = icmp sgt i64 %254, -1 - %256 = trunc nsw i64 %254 to i32 - %257 = icmp samesign ugt i32 %159, %256 - %258 = mul nuw nsw i64 %254, %206 - %259 = trunc i64 %258 to i32 - %260 = add i32 %157, %259 - %261 = icmp samesign ule i32 %260, %163 - %262 = getelementptr inbounds nuw i16, ptr %152, i64 %258 - %263 = and i64 %253, 2147483647 - %264 = icmp slt i32 %251, %200 - br label %265 - -265: ; preds = %.preheader.us, %274 - %indvars.iv = phi i64 [ 0, %.preheader.us ], [ %indvars.iv.next, %274 ] - %266 = trunc nuw nsw i64 %indvars.iv to i32 - %267 = xor i32 %266, -1 - %268 = add i32 %252, %267 - %269 = add nuw nsw i64 %indvars.iv, %263 - br i1 %264, label %270, label %.split.us - -270: ; preds = %265 - %271 = load i32, ptr %196, align 8, !tbaa !111 - %272 = trunc nuw i64 %269 to i32 - %273 = icmp sgt i32 %271, %272 - br i1 %273, label %274, label %.split.us - -274: ; preds = %270 - %275 = add nuw nsw i64 %indvars.iv, %203 - %276 = and i64 %275, 2147483648 - %277 = icmp eq i64 %276, 0 + %256 = icmp samesign ult i64 %254, %206 + %257 = mul nuw nsw i64 %258, %207 + %258 = add nuw nsw i64 %257, %208 + %259 = icmp samesign ule i64 %258, %209 + %260 = getelementptr inbounds nuw i16, ptr %152, i64 %257 + %261 = and i64 %257, 2147483647 + %262 = icmp slt i32 %255, %200 + br label %267 + +267:; preds = %.preheader.us, %276 + %indvars.iv = phi i64 [ 0, %.preheader.us ], [ %indvars.iv.next, %276 ] + %268 = trunc nuw nsw i64 %indvars.iv to i32 + %indvars.iv = xor i32 %268, -1 + %266 = add i32 %256, %indvars.iv + %267 = add nuw nsw i64 %indvars.iv, %261 + br i1 %266, label %272, label %.split.us + +272:; preds = %267 + %273 = load i32, ptr %196, align 8, !tbaa !111 + %274 = trunc nuw i64 %267 to i32 + %271 = icmp sgt i32 %273, %274 + br i1 %275, label %276, label %.split.us + +276:; preds = %272 + %277 = add nuw nsw i64 %indvars.iv, %203 + %278 = and i64 %277, 2147483648 + %275 = icmp eq i64 %278, 0 + call void @llvm.assume(i1 %275) + %277 = icmp samesign ult i64 %277, %204 call void @llvm.assume(i1 %277) - %278 = icmp samesign ult i64 %275, %204 - call void @llvm.assume(i1 %278) + call void @llvm.assume(i1 %259) + call void @llvm.assume(i1 %260) call void @llvm.assume(i1 %255) - call void @llvm.assume(i1 %257) - call void @llvm.assume(i1 %261) - %279 = getelementptr inbounds nuw i16, ptr %262, i64 %275 - %280 = load i16, ptr %279, align 2, !tbaa !178 - %281 = icmp samesign ult i64 %269, %202 - call void @llvm.assume(i1 %281) - %282 = icmp sgt i32 %268, -1 - call void @llvm.assume(i1 %282) - %283 = icmp samesign ult i32 %268, %180 + %281 = getelementptr inbounds nuw i16, ptr %264, i64 %277 + %282 = load i16, ptr %281, align 2, !tbaa !178 + %279 = icmp samesign ult i64 %271, %202 call void @llvm.assume(i1 %283) - %284 = mul nuw nsw i32 %268, %183 - %285 = add nuw nsw i32 %284, %178 - %286 = icmp samesign ule i32 %285, %184 + %284 = icmp sgt i32 %266, -1 + call void @llvm.assume(i1 %284) + %286 = icmp samesign ult i32 %270, %180 call void @llvm.assume(i1 %286) - %287 = zext nneg i32 %284 to i64 - %288 = getelementptr inbounds nuw i16, ptr %173, i64 %287 - %289 = getelementptr inbounds nuw i16, ptr %288, i64 %269 - store i16 %280, ptr %289, align 2, !tbaa !178 + %287 = mul nuw nsw i32 %270, %183 + %288 = add nuw nsw i32 %286, %178 + %289 = icmp samesign ule i32 %288, %184 + call void @llvm.assume(i1 %288) + %289 = zext nneg i32 %286 to i64 + %290 = getelementptr inbounds nuw i16, ptr %173, i64 %289 + %291 = getelementptr inbounds nuw i16, ptr %290, i64 %271 + store i16 %282, ptr %291, align 2, !tbaa !178 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond.not, label %._crit_edge.split.us208, label %265, !llvm.loop !180 + br i1 %exitcond.not, label %._crit_edge.split.us208, label %267, !llvm.loop !180 -._crit_edge.split.us208: ; preds = %274 +._crit_edge.split.us208: ; preds = %276 %exitcond218.not = icmp eq i64 %indvars.iv.next215, %wide.trip.count217 br i1 %exitcond218.not, label %._crit_edge205, label %.preheader.us, !llvm.loop !181 @@ -2923,11 +2925,11 @@ _ZNSt16_Sp_counted_baseILN9__gnu_cxx12_Lock_policyE2EE15_M_add_ref_copyEv.exit.i %307 = load ptr, ptr %299, align 8, !tbaa !70 %308 = getelementptr inbounds nuw i8, ptr %307, i64 16 %309 = load ptr, ptr %308, align 8 - call void %309(ptr noundef nonnull align 8 dereferenceable(16) %299) #27 + call void %311(ptr noundef nonnull align 8 dereferenceable(16) %299) #27 %310 = load ptr, ptr %299, align 8, !tbaa !70 %311 = getelementptr inbounds nuw i8, ptr %310, i64 24 %312 = load ptr, ptr %311, align 8 - call void %312(ptr noundef nonnull align 8 dereferenceable(16) %299) #27 + call void %314(ptr noundef nonnull align 8 dereferenceable(16) %299) #27 br label %_ZN8rawspeed8RawImageaSERKS0_.exit 313: ; preds = %300 @@ -2974,11 +2976,11 @@ _ZN8rawspeed8RawImageaSERKS0_.exit.thread: ; preds = %._crit_edge205, %_Z %328 = load ptr, ptr %321, align 8, !tbaa !70 %329 = getelementptr inbounds nuw i8, ptr %328, i64 16 %330 = load ptr, ptr %329, align 8 - call void %330(ptr noundef nonnull align 8 dereferenceable(16) %321) #27 + call void %332(ptr noundef nonnull align 8 dereferenceable(16) %321) #27 %331 = load ptr, ptr %321, align 8, !tbaa !70 %332 = getelementptr inbounds nuw i8, ptr %331, i64 24 %333 = load ptr, ptr %332, align 8 - call void %333(ptr noundef nonnull align 8 dereferenceable(16) %321) #27 + call void %335(ptr noundef nonnull align 8 dereferenceable(16) %321) #27 br label %_ZN8rawspeed8RawImageD2Ev.exit 334: ; preds = %_ZN8rawspeed8RawImageaSERKS0_.exit.thread @@ -3010,29 +3012,29 @@ _ZN8rawspeed8RawImageD2Ev.exit: ; preds = %_ZN8rawspeed8RawIma br label %355 342: ; preds = %_ZN8rawspeed8RawImage6createERKNS_8iPoint2DENS_12RawImageTypeEj.exit - %343 = landingpad { ptr, i32 } - cleanup - br label %349 - -344: ; preds = %.noexc114, %.noexc113, %.noexc112, %.noexc111, %.noexc110, %.noexc109, %.noexc108, %123 %345 = landingpad { ptr, i32 } cleanup br label %349 -346: ; preds = %.split.us +346: ; preds = %.noexc114, %.noexc113, %.noexc112, %.noexc111, %.noexc110, %.noexc109, %.noexc108, %123 %347 = landingpad { ptr, i32 } cleanup br label %349 -.split.us: ; preds = %270, %265, %221, %229 +348: ; preds = %.split.us + %349 = landingpad { ptr, i32 } + cleanup + br label %351 + +.split.us: ; preds = %272, %267, %225, %233 invoke void (ptr, ...) @_ZN8rawspeed14ThrowExceptionINS_19RawDecoderExceptionEEEvPKcz(ptr noundef nonnull @.str.12, ptr noundef nonnull @__PRETTY_FUNCTION__._ZN8rawspeed10RafDecoder16applyCorrectionsEPKNS_6CameraE) #13 - to label %348 unwind label %346 + to label %348 unwind label %348 348: ; preds = %.split.us unreachable -349: ; preds = %346, %344, %342 - %.pn50 = phi { ptr, i32 } [ %347, %346 ], [ %345, %344 ], [ %343, %342 ] +349: ; preds = %348, %346, %344 + %.pn50 = phi { ptr, i32 } [ %349, %348 ], [ %347, %346 ], [ %345, %344 ] call void @_ZN8rawspeed8RawImageD2Ev(ptr noundef nonnull align 8 dereferenceable(16) %5) #27 call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %5) #27 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %4) #27 diff --git a/bench/darktable/optimized/SamsungV0Decompressor.ll b/bench/darktable/optimized/SamsungV0Decompressor.ll index bef13422050..2b6fe09ce24 100644 --- a/bench/darktable/optimized/SamsungV0Decompressor.ll +++ b/bench/darktable/optimized/SamsungV0Decompressor.ll @@ -1003,7 +1003,8 @@ _ZN8rawspeed16BitStreamerMSB32CI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequ %68 = icmp sgt i32 %1, -1 %invariant.gep = getelementptr i8, ptr %60, i64 -2 %69 = zext nneg i32 %13 to i64 - br label %72 + %70 = zext nneg i32 %13 to i64 + br label %73 70: ; preds = %_ZN8rawspeed16BitStreamerMSB32CI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequentialReplenisherIS0_EEEEENS_10Array1DRefIKSt4byteEE.exit, %70 %.0.idx508 = phi i64 [ 0, %_ZN8rawspeed16BitStreamerMSB32CI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequentialReplenisherIS0_EEEEENS_10Array1DRefIKSt4byteEE.exit ], [ %.0.add, %70 ] @@ -1135,12 +1136,12 @@ _ZN8rawspeed11BitStreamerINS_16BitStreamerMSB32ENS_39BitStreamerForwardSequentia 109: ; preds = %.preheader485 %110 = icmp samesign ult i32 %.sroa.24.2513, 65 - tail call void @llvm.assume(i1 %110) - %111 = icmp sgt i32 %.sroa.59215.1514, -1 tail call void @llvm.assume(i1 %111) - %112 = and i32 %.sroa.59215.1514, 3 - %113 = icmp eq i32 %112, 0 - tail call void @llvm.assume(i1 %113) + %112 = icmp sgt i32 %.sroa.59215.1514, -1 + tail call void @llvm.assume(i1 %112) + %113 = and i32 %.sroa.59215.1514, 3 + %114 = icmp eq i32 %113, 0 + tail call void @llvm.assume(i1 %114) %.not.i.i120 = icmp samesign ult i32 %.sroa.24.2513, 4 br i1 %.not.i.i120, label %114, label %.thread567 @@ -1261,16 +1262,16 @@ _ZN8rawspeed39BitStreamerForwardSequentialReplenisherINS_16BitStreamerMSB32EE8ge 165: ; preds = %.preheader484 %166 = icmp samesign ult i32 %.sroa.24.4517, 65 - tail call void @llvm.assume(i1 %166) - %167 = icmp sgt i32 %.sroa.59215.3518, -1 tail call void @llvm.assume(i1 %167) - %168 = and i32 %.sroa.59215.3518, 3 - %169 = icmp eq i32 %168, 0 - tail call void @llvm.assume(i1 %169) - %170 = icmp samesign ult i32 %164, 33 + %168 = icmp sgt i32 %.sroa.59215.3518, -1 + tail call void @llvm.assume(i1 %168) + %169 = and i32 %.sroa.59215.3518, 3 + %170 = icmp eq i32 %169, 0 tail call void @llvm.assume(i1 %170) - %.not.i.i.i122 = icmp samesign ult i32 %.sroa.24.4517, %164 - br i1 %.not.i.i.i122, label %171, label %_ZN8rawspeed11BitStreamerINS_16BitStreamerMSB32ENS_39BitStreamerForwardSequentialReplenisherIS1_EEE7getBitsEi.exit.i + %.not.i.i.i122 = icmp samesign ult i32 %164, 33 + tail call void @llvm.assume(i1 %.not.i.i.i122) + %.not.i.i.i122 = icmp samesign ult i32 %.sroa.24.4517, %165 + br i1 %.not.i.i.i122, label %172, label %_ZN8rawspeed11BitStreamerINS_16BitStreamerMSB32ENS_39BitStreamerForwardSequentialReplenisherIS1_EEE7getBitsEi.exit.i 171: ; preds = %165 call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %.sroa.0.i.i.i.i) @@ -1366,16 +1367,16 @@ _ZN8rawspeed21SamsungV0Decompressor7calcAdjERNS_16BitStreamerMSB32Ei.exit: ; pre 213: ; preds = %.preheader %214 = icmp samesign ult i32 %.sroa.24.5521, 65 - tail call void @llvm.assume(i1 %214) - %215 = icmp sgt i32 %.sroa.59215.4522, -1 tail call void @llvm.assume(i1 %215) - %216 = and i32 %.sroa.59215.4522, 3 - %217 = icmp eq i32 %216, 0 - tail call void @llvm.assume(i1 %217) - %218 = icmp samesign ult i32 %212, 33 + %216 = icmp sgt i32 %.sroa.59215.4522, -1 + tail call void @llvm.assume(i1 %216) + %217 = and i32 %.sroa.59215.4522, 3 + %218 = icmp eq i32 %217, 0 tail call void @llvm.assume(i1 %218) - %.not.i.i.i127 = icmp samesign ult i32 %.sroa.24.5521, %212 - br i1 %.not.i.i.i127, label %219, label %_ZN8rawspeed11BitStreamerINS_16BitStreamerMSB32ENS_39BitStreamerForwardSequentialReplenisherIS1_EEE7getBitsEi.exit.i130 + %.not.i.i.i127 = icmp samesign ult i32 %213, 33 + tail call void @llvm.assume(i1 %.not.i.i.i127) + %.not.i.i.i127 = icmp samesign ult i32 %.sroa.24.5521, %213 + br i1 %.not.i.i.i127, label %220, label %_ZN8rawspeed11BitStreamerINS_16BitStreamerMSB32ENS_39BitStreamerForwardSequentialReplenisherIS1_EEE7getBitsEi.exit.i130 219: ; preds = %213 call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %.sroa.0.i.i.i.i125) @@ -1460,8 +1461,7 @@ _ZN8rawspeed21SamsungV0Decompressor7calcAdjERNS_16BitStreamerMSB32Ei.exit138: ; 257: ; preds = %256 %258 = add nsw i64 %indvars.iv557, -2 - %259 = trunc nsw i64 %258 to i32 - %260 = icmp samesign ugt i32 %13, %259 + %259 = icmp samesign ult i64 %258, %70 tail call void @llvm.assume(i1 %260) tail call void @llvm.assume(i1 %68) tail call void @llvm.assume(i1 %55) diff --git a/bench/darktable/optimized/SamsungV1Decompressor.ll b/bench/darktable/optimized/SamsungV1Decompressor.ll index 11b4bed424b..122d96530a7 100644 --- a/bench/darktable/optimized/SamsungV1Decompressor.ll +++ b/bench/darktable/optimized/SamsungV1Decompressor.ll @@ -384,13 +384,16 @@ _ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequen .lr.ph217: ; preds = %_ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequentialReplenisherIS0_EEEEENS_10Array1DRefIKSt4byteEE.exit.preheader %.sroa.5.0..sroa_idx = getelementptr inbounds nuw i8, ptr %1, i64 4 %57 = add nuw nsw i32 %38, 8 - %58 = zext nneg i32 %17 to i64 + %58 = zext nneg i32 %14 to i64 + %59 = zext nneg i32 %17 to i64 br i1 %24, label %.lr.ph217.split.preheader, label %.lr.ph217.split.us.preheader .lr.ph217.split.us.preheader: ; preds = %.lr.ph217 - %59 = zext nneg i32 %17 to i64 %60 = zext nneg i32 %12 to i64 %61 = zext nneg i32 %18 to i64 + %62 = zext nneg i32 %17 to i64 + %63 = zext nneg i32 %12 to i64 + %64 = zext nneg i32 %18 to i64 %wide.trip.count232 = zext nneg i32 %14 to i64 br label %.lr.ph217.split.us @@ -410,28 +413,26 @@ _ZN8rawspeed14BitStreamerMSBCI2NS_11BitStreamerIS0_NS_39BitStreamerForwardSequen 63: ; preds = %.lr.ph217.split.us %64 = add nsw i64 %indvars.iv229, -2 - %65 = trunc nsw i64 %64 to i32 - %66 = icmp samesign ugt i32 %14, %65 - tail call void @llvm.assume(i1 %66) - %67 = mul nuw nsw i64 %64, %58 - %68 = trunc i64 %67 to i32 - %69 = add i32 %12, %68 - %70 = icmp samesign ule i32 %69, %18 - tail call void @llvm.assume(i1 %70) - %71 = getelementptr inbounds nuw i16, ptr %7, i64 %67 - %72 = load i16, ptr %71, align 2, !tbaa !116 - %73 = getelementptr inbounds nuw i8, ptr %71, i64 2 - %74 = zext i16 %72 to i32 - %75 = load i16, ptr %73, align 2, !tbaa !116 - %76 = zext i16 %75 to i32 - store i32 %74, ptr %1, align 8 - store i32 %76, ptr %.sroa.5.0..sroa_idx, align 4, !tbaa !15 + %65 = icmp samesign ult i64 %64, %58 + tail call void @llvm.assume(i1 %65) + %69 = mul nuw nsw i64 %67, %59 + %67 = add nuw nsw i64 %69, %60 + %68 = icmp samesign ule i64 %67, %61 + tail call void @llvm.assume(i1 %68) + %70 = getelementptr inbounds nuw i16, ptr %7, i64 %69 + %73 = load i16, ptr %70, align 2, !tbaa !116 + %71 = getelementptr inbounds nuw i8, ptr %72, i64 2 + %72 = zext i16 %73 to i32 + %73 = load i16, ptr %71, align 2, !tbaa !116 + %74 = zext i16 %76 to i32 + store i32 %72, ptr %1, align 8 + store i32 %77, ptr %.sroa.5.0..sroa_idx, align 4, !tbaa !15 br label %.lr.ph208.us .lr.ph208.us: ; preds = %63, %.lr.ph217.split.us - %77 = mul nuw nsw i64 %indvars.iv229, %59 - %78 = add nuw nsw i64 %77, %60 - %79 = icmp samesign ule i64 %78, %61 + %77 = mul nuw nsw i64 %indvars.iv229, %62 + %78 = add nuw nsw i64 %77, %63 + %79 = icmp samesign ule i64 %78, %64 %80 = getelementptr inbounds nuw i16, ptr %7, i64 %77 br label %81 @@ -496,43 +497,43 @@ _ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialR %108 = load i8, ptr %107, align 1, !tbaa !111 %109 = zext nneg i8 %108 to i32 %110 = icmp ult i8 %108, 33 - tail call void @llvm.assume(i1 %110) - %111 = icmp samesign uge i32 %105, %109 tail call void @llvm.assume(i1 %111) - %112 = sub nsw i32 %105, %109 - %113 = zext nneg i8 %108 to i64 - %114 = shl i64 %104, %113 - %115 = getelementptr inbounds nuw i8, ptr %107, i64 1 - %116 = load i8, ptr %115, align 1, !tbaa !113 - %117 = icmp eq i8 %116, 0 - br i1 %117, label %_ZN8rawspeed21SamsungV1Decompressor11samsungDiffERNS_14BitStreamerMSBERKSt6vectorINS0_12encTableItemESaIS4_EE.exit.us, label %118 - -118: ; preds = %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us - %119 = zext nneg i8 %116 to i32 - %120 = icmp samesign ult i32 %112, 65 - tail call void @llvm.assume(i1 %120) - %121 = icmp ult i8 %116, 33 + %112 = icmp samesign uge i32 %105, %109 + tail call void @llvm.assume(i1 %112) + %114 = sub nsw i32 %106, %110 + %115 = zext nneg i8 %109 to i64 + %116 = shl i64 %105, %115 + %117 = getelementptr inbounds nuw i8, ptr %108, i64 1 + %117 = load i8, ptr %117, align 1, !tbaa !113 + %118 = icmp eq i8 %117, 0 + br i1 %118, label %_ZN8rawspeed21SamsungV1Decompressor11samsungDiffERNS_14BitStreamerMSBERKSt6vectorINS0_12encTableItemESaIS4_EE.exit.us, label %119 + +119:; preds = %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us + %120 = zext nneg i8 %117 to i32 + %121 = icmp samesign ult i32 %113, 65 tail call void @llvm.assume(i1 %121) - %122 = icmp samesign uge i32 %112, %119 + %122 = icmp ult i8 %117, 33 tail call void @llvm.assume(i1 %122) - %123 = sub nuw nsw i32 64, %119 - %124 = zext nneg i32 %123 to i64 - %125 = lshr i64 %114, %124 - %126 = trunc i64 %125 to i32 - %127 = sub nsw i32 %112, %119 - %128 = zext nneg i8 %116 to i64 - %129 = shl i64 %114, %128 - %130 = icmp sgt i64 %114, -1 - %notmask.i.i.us = shl nsw i32 -1, %119 + %123 = icmp samesign uge i32 %114, %120 + tail call void @llvm.assume(i1 %123) + %125 = sub nuw nsw i32 64, %120 + %126 = zext nneg i32 %125 to i64 + %127 = lshr i64 %115, %125 + %128 = trunc i64 %126 to i32 + %129 = sub nsw i32 %114, %120 + %130 = zext nneg i8 %117 to i64 + %notmask.i.i.us = shl i64 %115, %129 + %131 = icmp sgt i64 %115, -1 + %notmask.i.i.us = shl nsw i32 -1, %120 %.neg.i.i.us = add nuw nsw i32 %notmask.i.i.us, 1 - %131 = select i1 %130, i32 %.neg.i.i.us, i32 0 - %.0.i.i.us = add i32 %131, %126 + %131 = select i1 %131, i32 %.neg.i.i.us, i32 0 + %.0.i.i.us = add i32 %131, %127 br label %_ZN8rawspeed21SamsungV1Decompressor11samsungDiffERNS_14BitStreamerMSBERKSt6vectorINS0_12encTableItemESaIS4_EE.exit.us -_ZN8rawspeed21SamsungV1Decompressor11samsungDiffERNS_14BitStreamerMSBERKSt6vectorINS0_12encTableItemESaIS4_EE.exit.us: ; preds = %118, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us - %.sroa.066.2.us = phi i64 [ %114, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us ], [ %129, %118 ] - %.sroa.8.2.us = phi i32 [ %112, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us ], [ %127, %118 ] - %.0.i.us = phi i32 [ 0, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us ], [ %.0.i.i.us, %118 ] +_ZN8rawspeed21SamsungV1Decompressor11samsungDiffERNS_14BitStreamerMSBERKSt6vectorINS0_12encTableItemESaIS4_EE.exit.us: ; preds = %119, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us + %.sroa.066.2.us = phi i64 [ %115, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us ], [ %130, %119 ] + %.sroa.8.2.us = phi i32 [ %113, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us ], [ %128, %119 ] + %.0.i.us = phi i32 [ 0, %_ZN8rawspeed11BitStreamerINS_14BitStreamerMSBENS_39BitStreamerForwardSequentialReplenisherIS1_EEE4fillEi.exit.i.us ], [ %.0.i.i.us, %119 ] %132 = and i64 %indvars.iv, 1 %133 = getelementptr inbounds nuw [2 x i32], ptr %1, i64 0, i64 %132 %134 = load i32, ptr %133, align 4, !tbaa !16 @@ -547,7 +548,7 @@ _ZN8rawspeed21SamsungV1Decompressor11samsungDiffERNS_14BitStreamerMSBERKSt6vecto %139 = trunc nuw i32 %135 to i16 store i16 %139, ptr %138, align 2, !tbaa !116 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond228.not = icmp eq i64 %indvars.iv.next, %60 + %exitcond228.not = icmp eq i64 %indvars.iv.next, %63 br i1 %exitcond228.not, label %._crit_edge209.us, label %81, !llvm.loop !118 ._crit_edge209.us: ; preds = %137 diff --git a/bench/gromacs/optimized/lmmin.ll b/bench/gromacs/optimized/lmmin.ll index b506d71864f..dd0413ac9bb 100644 --- a/bench/gromacs/optimized/lmmin.ll +++ b/bench/gromacs/optimized/lmmin.ll @@ -2919,8 +2919,9 @@ define internal fastcc void @_ZL8lm_qrfaciiPdPiS_S_S_(i32 noundef range(i32 0, - .preheader224: ; preds = %_Z8lm_enormiPKd.exit, %.thread.i.us %15 = zext nneg i32 %0 to i64 - %16 = add nuw i32 %0, 1 - br label %77 + %16 = zext nneg i32 %1 to i64 + %17 = add nuw i32 %0, 1 + br label %78 .lr.ph.preheader.i: ; preds = %7, %_Z8lm_enormiPKd.exit %indvars.iv = phi i64 [ %indvars.iv.next, %_Z8lm_enormiPKd.exit ], [ 0, %7 ] @@ -3036,14 +3037,14 @@ define internal fastcc void @_ZL8lm_qrfaciiPdPiS_S_S_(i32 noundef range(i32 0, - _Z8lm_enormiPKd.exit: ; preds = %49, %59, %65, %.thread.i %.062.i = phi double [ %54, %49 ], [ %64, %59 ], [ %69, %65 ], [ %71, %.thread.i ] %72 = getelementptr inbounds nuw double, ptr %5, i64 %indvars.iv - store double %.062.i, ptr %72, align 8 - %73 = getelementptr inbounds nuw double, ptr %4, i64 %indvars.iv store double %.062.i, ptr %73, align 8 - %74 = getelementptr inbounds nuw double, ptr %6, i64 %indvars.iv + %74 = getelementptr inbounds nuw double, ptr %4, i64 %indvars.iv store double %.062.i, ptr %74, align 8 - %75 = getelementptr inbounds nuw i32, ptr %3, i64 %indvars.iv - %76 = trunc nuw nsw i64 %indvars.iv to i32 - store i32 %76, ptr %75, align 4 + %75 = getelementptr inbounds nuw double, ptr %6, i64 %indvars.iv + store double %.062.i, ptr %75, align 8 + %76 = getelementptr inbounds nuw i32, ptr %3, i64 %indvars.iv + %77 = trunc nuw nsw i64 %indvars.iv to i32 + store i32 %77, ptr %76, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count254 br i1 %exitcond.not, label %.preheader224, label %.lr.ph.preheader.i, !llvm.loop !62 @@ -3053,7 +3054,7 @@ _Z8lm_enormiPKd.exit: ; preds = %49, %59, %65, %.thr %indvars.iv256 = phi i64 [ 1, %.preheader224 ], [ %indvars.iv.next257, %278 ] %indvars290 = trunc i64 %indvars.iv268 to i32 %indvars.iv.next269 = add nuw nsw i64 %indvars.iv268, 1 - %78 = icmp samesign ult i64 %indvars.iv.next269, %wide.trip.count254 + %78 = icmp samesign ult i64 %indvars.iv.next269, %16 br i1 %78, label %.lr.ph, label %._crit_edge .lr.ph: ; preds = %77, %.lr.ph @@ -3121,7 +3122,7 @@ _Z8lm_enormiPKd.exit: ; preds = %49, %59, %65, %.thr 105: ; preds = %._crit_edge231, %._crit_edge %106 = sub nsw i64 %15, %indvars.iv268 %107 = mul nuw nsw i64 %indvars.iv268, %15 - %108 = mul nuw i32 %16, %indvars290 + %108 = mul nuw i32 %17, %indvars290 %109 = zext nneg i32 %108 to i64 %110 = getelementptr inbounds nuw double, ptr %2, i64 %109 %111 = trunc nsw i64 %106 to i32 @@ -3247,7 +3248,7 @@ _Z8lm_enormiPKd.exit199: ; preds = %145, %155, %161, %. %171 = fcmp olt double %170, 0.000000e+00 %172 = fneg double %.062.i181 %.0165 = select i1 %171, double %172, double %.062.i181 - %173 = icmp samesign ult i64 %indvars.iv268, %wide.trip.count.i + %173 = icmp samesign ult i64 %indvars.iv268, %15 br i1 %173, label %.lr.ph234.preheader, label %._crit_edge235 .lr.ph234.preheader: ; preds = %169 diff --git a/bench/icu/optimized/rematch.ll b/bench/icu/optimized/rematch.ll index a2e71d880b0..569e9dafdf9 100644 --- a/bench/icu/optimized/rematch.ll +++ b/bench/icu/optimized/rematch.ll @@ -16688,8 +16688,7 @@ if.end8.i: ; preds = %if.end6.i if.end13.i: ; preds = %if.end8.i %46 = add nsw i64 %indvars.iv, -1 - %47 = zext i32 %44 to i64 - %cmp5.i.i = icmp samesign ult i64 %46, %47 + %47 = icmp samesign ult i64 %46, %45 br i1 %cmp5.i.i, label %cond.true.i.i196, label %if.end8.i188 if.end8.i188: ; preds = %if.end13.i diff --git a/bench/llvm/optimized/TargetLoweringBase.ll b/bench/llvm/optimized/TargetLoweringBase.ll index 2e948827786..b728f2a5706 100644 --- a/bench/llvm/optimized/TargetLoweringBase.ll +++ b/bench/llvm/optimized/TargetLoweringBase.ll @@ -6127,7 +6127,7 @@ _ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit130: ; preds = %151, %34 %168 = getelementptr inbounds nuw i8, ptr %167, i64 168 %169 = load ptr, ptr %168, align 8 %170 = call noundef zeroext i8 %169(ptr noundef nonnull align 8 dereferenceable(412423) %0, i16 %157) #27 - switch i8 %170, label %340 [ + switch i8 %170, label %342 [ i8 1, label %.preheader304 i8 7, label %.loopexit i8 6, label %.thread271 @@ -6135,8 +6135,9 @@ _ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit130: ; preds = %151, %34 ] .preheader304: ; preds = %160 - %171 = select i1 %spec.select.i.i, i64 169, i64 87 - %.not110318.not = icmp samesign ult i64 %indvars.iv354, %171 + %171 = select i1 %spec.select.i.i, i32 169, i32 87 + %.not110318.not = zext nneg i32 %171 to i64 + %.not110318.not = icmp samesign ult i64 %indvars.iv354, %172 br i1 %.not110318.not, label %.lr.ph320, label %.loopexit .lr.ph320: ; preds = %.preheader304 @@ -6144,23 +6145,24 @@ _ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit130: ; preds = %151, %34 %173 = add nsw i64 %172, -1 %174 = getelementptr inbounds [241 x %"class.llvm::TypeSize"], ptr @_ZZNK4llvm3MVT13getSizeInBitsEvE9SizeTable, i64 0, i64 %173 %.sroa.0.0.copyload.i.i131 = load i64, ptr %174, align 16 - br label %175 + %176 = zext nneg i32 %171 to i64 + br label %177 -175: ; preds = %.lr.ph320, %.critedge +177: ; preds = %.lr.ph320, %.critedge %indvars.iv348 = phi i64 [ %indvars.iv346, %.lr.ph320 ], [ %indvars.iv.next349, %.critedge ] %176 = trunc i64 %indvars.iv348 to i16 %177 = add i16 %176, -17 %spec.select.i.i.i = icmp ult i16 %177, 174 br i1 %spec.select.i.i.i, label %178, label %_ZNK4llvm3MVT19getScalarSizeInBitsEv.exit -178: ; preds = %175 +178: ; preds = %177 %179 = add nsw i64 %indvars.iv348, -1 %180 = getelementptr inbounds [241 x i16], ptr @_ZZNK4llvm3MVT20getVectorElementTypeEvE10EltTyTable, i64 0, i64 %179 %181 = load i16, ptr %180, align 2, !tbaa !138 br label %_ZNK4llvm3MVT19getScalarSizeInBitsEv.exit -_ZNK4llvm3MVT19getScalarSizeInBitsEv.exit: ; preds = %175, %178 - %.sroa.0.0.i.i = phi i16 [ %181, %178 ], [ %176, %175 ] +_ZNK4llvm3MVT19getScalarSizeInBitsEv.exit: ; preds = %177, %178 + %.sroa.0.0.i.i = phi i16 [ %181, %178 ], [ %176, %177 ] %182 = zext i16 %.sroa.0.0.i.i to i64 %183 = add nsw i64 %182, -1 %184 = getelementptr inbounds [241 x %"class.llvm::TypeSize"], ptr @_ZZNK4llvm3MVT13getSizeInBitsEvE9SizeTable, i64 0, i64 %183 @@ -6200,8 +6202,8 @@ _ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit137: ; preds = %186 .critedge: ; preds = %186, %_ZNK4llvm3MVT19getScalarSizeInBitsEv.exit, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit137 %indvars.iv.next349 = add nuw nsw i64 %indvars.iv348, 1 - %.not110.not = icmp samesign ult i64 %indvars.iv348, %171 - br i1 %.not110.not, label %175, label %.loopexit, !llvm.loop !392 + %.not110.not = icmp samesign ult i64 %indvars.iv348, %176 + br i1 %.not110.not, label %177, label %.loopexit, !llvm.loop !392 .loopexit: ; preds = %.critedge, %.preheader304, %160 %199 = call range(i16 0, 17) i16 @llvm.ctpop.i16(i16 %165) @@ -6292,14 +6294,14 @@ _ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit148: ; preds = %_ZN4llvm 232: ; preds = %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit148 %233 = getelementptr inbounds nuw [234 x %"class.llvm::MVT"], ptr %6, i64 0, i64 %indvars.iv354 - store i16 %.sroa.03.0.i268, ptr %233, align 2, !tbaa !138 - %234 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 - store i8 7, ptr %234, align 1, !tbaa !84 - %235 = getelementptr inbounds nuw [234 x %"class.llvm::MVT"], ptr %7, i64 0, i64 %indvars.iv354 store i16 %.sroa.03.0.i268, ptr %235, align 2, !tbaa !138 - %236 = getelementptr inbounds nuw [234 x i16], ptr %5, i64 0, i64 %indvars.iv354 - store i16 1, ptr %236, align 2, !tbaa !137 - br label %341 + %236 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 + store i8 7, ptr %236, align 1, !tbaa !84 + %237 = getelementptr inbounds nuw [234 x %"class.llvm::MVT"], ptr %7, i64 0, i64 %indvars.iv354 + store i16 %.sroa.03.0.i268, ptr %237, align 2, !tbaa !138 + %238 = getelementptr inbounds nuw [234 x i16], ptr %5, i64 0, i64 %indvars.iv354 + store i16 1, ptr %238, align 2, !tbaa !137 + br label %343 .thread271: ; preds = %.critedge3, %.preheader303, %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit148, %160, %160 %237 = add nsw i16 %157, -191 @@ -6376,18 +6378,18 @@ _ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.thread.i: ; preds = %_Z .critedge.i: ; preds = %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.thread.i, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.i, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.thread.us.i, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.us.i, %241 %.sroa.052.sroa.0.1.lcssa.i = phi i32 [ %.sroa.052.sroa.0.0.i, %241 ], [ %.sroa.052.sroa.0.188.us.i, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.us.i ], [ %247, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.thread.us.i ], [ %.sroa.052.sroa.0.188.i, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.i ], [ %254, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.thread.i ] %.1.lcssa.i = phi i16 [ %.025.i, %241 ], [ %.189.us.i, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.us.i ], [ %248, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.thread.us.i ], [ %.189.i, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.i ], [ %255, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit.thread.i ] - br i1 %narrow.not.i, label %259, label %257 + br i1 %narrow.not.i, label %261, label %259 -257: ; preds = %.critedge.i - %258 = call i16 @_ZN4llvm3MVT19getScalableVectorVTES0_j(i16 %163, i32 noundef %.sroa.052.sroa.0.1.lcssa.i) +259: ; preds = %.critedge.i + %260 = call i16 @_ZN4llvm3MVT19getScalableVectorVTES0_j(i16 %163, i32 noundef %.sroa.052.sroa.0.1.lcssa.i) br label %_ZN4llvm3MVT11getVectorVTES0_NS_12ElementCountE.exit35.i -259: ; preds = %.critedge.i - %260 = call i16 @_ZN4llvm3MVT11getVectorVTES0_j(i16 %163, i32 noundef %.sroa.052.sroa.0.1.lcssa.i) +261: ; preds = %.critedge.i + %262 = call i16 @_ZN4llvm3MVT11getVectorVTES0_j(i16 %163, i32 noundef %.sroa.052.sroa.0.1.lcssa.i) br label %_ZN4llvm3MVT11getVectorVTES0_NS_12ElementCountE.exit35.i -_ZN4llvm3MVT11getVectorVTES0_NS_12ElementCountE.exit35.i: ; preds = %259, %257 - %.sroa.04.0.i34.i = phi i16 [ %258, %257 ], [ %260, %259 ] +_ZN4llvm3MVT11getVectorVTES0_NS_12ElementCountE.exit35.i: ; preds = %261, %259 + %.sroa.04.0.i34.i = phi i16 [ %260, %259 ], [ %262, %261 ] %.not.i36.i = icmp eq i16 %.sroa.04.0.i34.i, 0 br i1 %.not.i36.i, label %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit37.thread.i, label %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit37.i @@ -6563,52 +6565,52 @@ _ZL25getVectorTypeBreakdownMVTN4llvm3MVTERS0_RjS1_PNS_18TargetLoweringBaseE.exit _ZNK4llvm3MVT17getPow2VectorTypeEv.exit156: ; preds = %320, %322 %.sroa.03.0.i155 = phi i16 [ %321, %320 ], [ %323, %322 ] %324 = icmp eq i16 %.sroa.03.0.i155, %157 - br i1 %324, label %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread, label %337 + br i1 %324, label %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread, label %339 _ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread: ; preds = %_ZL25getVectorTypeBreakdownMVTN4llvm3MVTERS0_RjS1_PNS_18TargetLoweringBaseE.exit, %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156 %325 = getelementptr inbounds nuw [234 x %"class.llvm::MVT"], ptr %6, i64 0, i64 %indvars.iv354 store i16 1, ptr %325, align 2, !tbaa !138 - switch i8 %170, label %330 [ - i8 5, label %326 - i8 6, label %328 + switch i8 %170, label %332 [ + i8 5, label %328 + i8 6, label %330 ] -326: ; preds = %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread - %327 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 - store i8 5, ptr %327, align 1, !tbaa !84 - br label %341 - 328: ; preds = %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread %329 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 - store i8 6, ptr %329, align 1, !tbaa !84 + store i8 5, ptr %329, align 1, !tbaa !84 br label %341 330: ; preds = %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread - %331 = icmp ugt i16 %165, 1 - br i1 %331, label %332, label %334 + %331 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 + store i8 6, ptr %331, align 1, !tbaa !84 + br label %343 -332: ; preds = %330 - %333 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 - store i8 6, ptr %333, align 1, !tbaa !84 - br label %341 +332: ; preds = %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread + %333 = icmp ugt i16 %165, 1 + br i1 %333, label %334, label %336 -334: ; preds = %330 - %335 = select i1 %spec.select.i.i, i8 10, i8 5 - %336 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 - store i8 %335, ptr %336, align 1, !tbaa !84 - br label %341 +334: ; preds = %332 + %335 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 + store i8 6, ptr %335, align 1, !tbaa !84 + br label %343 -337: ; preds = %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156 - %338 = getelementptr inbounds nuw [234 x %"class.llvm::MVT"], ptr %6, i64 0, i64 %indvars.iv354 - store i16 %.sroa.03.0.i155, ptr %338, align 2, !tbaa !138 - %339 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 - store i8 7, ptr %339, align 1, !tbaa !84 - br label %341 +336: ; preds = %332 + %337 = select i1 %spec.select.i.i, i8 10, i8 5 + %338 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 + store i8 %337, ptr %338, align 1, !tbaa !84 + br label %343 + +339:; preds = %_ZNK4llvm3MVT17getPow2VectorTypeEv.exit156 + %340 = getelementptr inbounds nuw [234 x %"class.llvm::MVT"], ptr %6, i64 0, i64 %indvars.iv354 + store i16 %.sroa.03.0.i155, ptr %340, align 2, !tbaa !138 + %341 = getelementptr inbounds nuw [234 x i8], ptr %152, i64 0, i64 %indvars.iv354 + store i8 7, ptr %341, align 1, !tbaa !84 + br label %343 -340: ; preds = %160 +342: ; preds = %160 unreachable -341: ; preds = %.thread260, %.thread264, %232, %326, %332, %334, %328, %337, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit130 +341: ; preds = %.thread260, %.thread264, %232, %328, %334, %336, %330, %339, %_ZNK4llvm18TargetLoweringBase11isTypeLegalENS_3EVTE.exit130 %indvars.iv.next355 = add nuw nsw i64 %indvars.iv354, 1 %indvars.iv.next347 = add nuw nsw i64 %indvars.iv346, 1 %exitcond363.not = icmp eq i64 %indvars.iv.next355, 191 @@ -6623,7 +6625,7 @@ _ZNK4llvm3MVT17getPow2VectorTypeEv.exit156.thread: ; preds = %_ZL25getVectorType %345 = load ptr, ptr %0, align 8, !tbaa !3 %346 = getelementptr inbounds nuw i8, ptr %345, i64 1264 %347 = load ptr, ptr %346, align 8 - %348 = call { ptr, i8 } %347(ptr noundef nonnull align 8 dereferenceable(412423) %0, ptr noundef %1, i16 %344) #27 + %348 = call { ptr, i8 } %349(ptr noundef nonnull align 8 dereferenceable(412423) %0, ptr noundef %1, i16 %344) #27 %.fca.0.extract = extractvalue { ptr, i8 } %348, 0 %.fca.1.extract = extractvalue { ptr, i8 } %348, 1 %349 = getelementptr inbounds nuw [234 x ptr], ptr %155, i64 0, i64 %indvars.iv364 diff --git a/bench/luajit/optimized/minilua.ll b/bench/luajit/optimized/minilua.ll index 6d95cbdea51..b0cf9b0b447 100644 --- a/bench/luajit/optimized/minilua.ll +++ b/bench/luajit/optimized/minilua.ll @@ -35167,12 +35167,11 @@ sw.default.i.i: ; preds = %if.else9.i.i %func18.i.i = getelementptr inbounds nuw i8, ptr %67, i64 8 %68 = load ptr, ptr %func18.i.i, align 8 %69 = load ptr, ptr %68, align 8 + %70 = add nsw i64 %indvars.iv, -10001 %nupvalues.i.i = getelementptr inbounds nuw i8, ptr %69, i64 11 %70 = load i8, ptr %nupvalues.i.i, align 1 - %conv.i.i145 = zext i8 %70 to i32 - %71 = trunc i64 %indvars.iv to i32 - %72 = add i32 %71, -10001 - %cmp21.not.i.i = icmp samesign ugt i32 %72, %conv.i.i145 + %conv.i.i145 = zext i8 %70 to i64 + %71 = icmp samesign ugt i64 %70, %conv.i.i145 %upvalue.i.i = getelementptr inbounds nuw i8, ptr %69, i64 40 %73 = add nsw i64 %indvars.iv, -10002 %arrayidx.i.i = getelementptr inbounds nuw [1 x %struct.lua_TValue], ptr %upvalue.i.i, i64 0, i64 %73 @@ -43370,12 +43369,11 @@ sw.default.i.i: ; preds = %if.else9.i.i %func18.i.i = getelementptr inbounds nuw i8, ptr %48, i64 8 %49 = load ptr, ptr %func18.i.i, align 8 %50 = load ptr, ptr %49, align 8 + %51 = add nsw i64 %indvars.iv, -10001 %nupvalues.i.i = getelementptr inbounds nuw i8, ptr %50, i64 11 %51 = load i8, ptr %nupvalues.i.i, align 1 - %conv.i.i53 = zext i8 %51 to i32 - %52 = trunc i64 %indvars.iv to i32 - %53 = add i32 %52, -10001 - %cmp21.not.i.i = icmp samesign ugt i32 %53, %conv.i.i53 + %conv.i.i53 = zext i8 %51 to i64 + %52 = icmp samesign ugt i64 %51, %conv.i.i53 %upvalue.i.i = getelementptr inbounds nuw i8, ptr %50, i64 40 %54 = add nsw i64 %indvars.iv, -10002 %arrayidx.i.i = getelementptr inbounds nuw [1 x %struct.lua_TValue], ptr %upvalue.i.i, i64 0, i64 %54 diff --git a/bench/miniaudio/optimized/unity.ll b/bench/miniaudio/optimized/unity.ll index f560da2c0a0..faf42fd5424 100644 --- a/bench/miniaudio/optimized/unity.ll +++ b/bench/miniaudio/optimized/unity.ll @@ -107215,11 +107215,7 @@ define internal fastcc void @ma_dr_mp3d_synth_granule(ptr noundef captures(none) entry: %t.i = alloca [4 x [8 x <4 x float>]], align 16 %cmp39 = icmp sgt i32 %nch, 0 - br i1 %cmp39, label %for.body.lr.ph, label %entry.for.end_crit_edge - -entry.for.end_crit_edge: ; preds = %entry - %.pre = zext nneg i32 %nbands to i64 - br label %for.end + br i1 %cmp39, label %for.body.lr.ph, label %for.end for.body.lr.ph: ; preds = %entry %sub137.i = add nsw i32 %nbands, -3 @@ -107490,8 +107486,7 @@ ma_dr_mp3d_DCT_II.exit: ; preds = %for.inc249.i %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !875 -for.end: ; preds = %ma_dr_mp3d_DCT_II.exit, %entry.for.end_crit_edge - %.pre-phi = phi i64 [ %.pre, %entry.for.end_crit_edge ], [ %1, %ma_dr_mp3d_DCT_II.exit ] +for.end: ; preds = %ma_dr_mp3d_DCT_II.exit, %entry tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(3840) %lins, ptr noundef nonnull align 4 dereferenceable(3840) %qmf_state, i64 3840, i1 false) %mul6 = shl nsw i32 %nch, 5 %sub.i = add nsw i32 %nch, -1 @@ -107500,6 +107495,7 @@ for.end: ; preds = %ma_dr_mp3d_DCT_II.e %idx.ext2.i = sext i32 %sub.i to i64 %idx.ext23.i = sext i32 %mul6 to i64 %44 = sext i32 %nch to i64 + %45 = zext nneg i32 %nbands to i64 br label %for.body3 for.body3: ; preds = %for.end, %ma_dr_mp3d_synth.exit @@ -107802,7 +107798,7 @@ for.body.i27: ; preds = %for.body.i27, %for. ma_dr_mp3d_synth.exit: ; preds = %for.body.i27 %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 2 - %cmp2 = icmp samesign ult i64 %indvars.iv.next46, %.pre-phi + %cmp2 = icmp samesign ult i64 %indvars.iv.next46, %45 br i1 %cmp2, label %for.body3, label %for.end14, !llvm.loop !877 for.end14: ; preds = %ma_dr_mp3d_synth.exit diff --git a/bench/opencv/optimized/fast_window_binarizer.ll b/bench/opencv/optimized/fast_window_binarizer.ll index 54ebe0ca4c8..1fca2986671 100644 --- a/bench/opencv/optimized/fast_window_binarizer.ll +++ b/bench/opencv/optimized/fast_window_binarizer.ll @@ -736,18 +736,20 @@ define hidden void @_ZN5zxing19FastWindowBinarizer15calcBlockTotalsEPiS1_ii(ptr br i1 %7, label %.lr.ph36.us.preheader, label %._crit_edge .lr.ph36.us.preheader: ; preds = %.lr.ph - %9 = zext nneg i32 %3 to i64 %wide.trip.count60 = zext nneg i32 %4 to i64 + %wide.trip.count = zext nneg i32 %3 to i64 br label %.lr.ph.us.us.preheader .lr.ph.us.us.preheader: ; preds = %..loopexit_crit_edge.us, %.lr.ph36.us.preheader %indvars.iv57 = phi i64 [ 0, %.lr.ph36.us.preheader ], [ %indvars.iv.next58, %..loopexit_crit_edge.us ] %indvars.iv43 = phi i32 [ 0, %.lr.ph36.us.preheader ], [ %indvars.iv.next44, %..loopexit_crit_edge.us ] - %10 = zext i32 %indvars.iv43 to i64 + %10 = sext i32 %indvars.iv43 to i64 %indvars.iv.next58 = add nuw nsw i64 %indvars.iv57, 1 %11 = mul nuw nsw i64 %indvars.iv.next58, 6 - %12 = mul nuw nsw i64 %indvars.iv57, %9 - %invariant.gep = getelementptr inbounds nuw i32, ptr %2, i64 %12 + %12 = trunc i64 %indvars.iv57 to i32 + %invariant.gep = mul i32 %3, %12 + %13 = zext i32 %12 to i64 + %invariant.gep = getelementptr inbounds nuw i32, ptr %2, i64 %13 br label %.lr.ph.us.us ..loopexit_crit_edge.us: ; preds = %._crit_edge.us.us @@ -778,11 +780,10 @@ define hidden void @_ZN5zxing19FastWindowBinarizer15calcBlockTotalsEPiS1_ii(ptr br label %27 24: ; preds = %27 - %indvars.iv.next51 = add i64 %indvars.iv50, 1 - %25 = and i64 %indvars.iv.next51, 4294967295 - %26 = icmp samesign ult i64 %25, %11 - %indvars.iv.next46 = add i32 %indvars.iv45, %13 - br i1 %26, label %19, label %._crit_edge.us.us, !llvm.loop !6 + %indvars.iv.next51 = add nuw nsw i64 %indvars.iv50, 1 + %25 = icmp samesign ult i64 %indvars.iv.next51, %10 + %26 = add i32 %indvars.iv45, %14 + br i1 %25, label %19, label %._crit_edge.us.us, !llvm.loop !6 27: ; preds = %27, %19 %indvars.iv47 = phi i64 [ %indvars.iv.next48, %27 ], [ %20, %19 ] @@ -799,7 +800,7 @@ define hidden void @_ZN5zxing19FastWindowBinarizer15calcBlockTotalsEPiS1_ii(ptr store i32 %30, ptr %gep, align 4 %indvars.iv.next54 = add nuw nsw i64 %indvars.iv53, 1 %indvars.iv.next = add nuw i32 %indvars.iv, 6 - %exitcond.not = icmp eq i64 %indvars.iv.next54, %9 + %exitcond.not = icmp eq i64 %indvars.iv.next54, %wide.trip.count br i1 %exitcond.not, label %..loopexit_crit_edge.us, label %.lr.ph.us.us, !llvm.loop !8 ._crit_edge: ; preds = %..loopexit_crit_edge.us, %.lr.ph, %5 @@ -1479,18 +1480,20 @@ _ZN5zxing3RefINS_15LuminanceSourceEED2Ev.exit: ; preds = %2, %7, %12 %60 = load ptr, ptr %59, align 8 %61 = getelementptr inbounds nuw i8, ptr %0, i64 152 %62 = load ptr, ptr %61, align 8 - %63 = zext nneg i32 %31 to i64 %wide.trip.count60.i = zext nneg i32 %32 to i64 + %wide.trip.count.i = zext nneg i32 %31 to i64 br label %.lr.ph.us.us.preheader.i .lr.ph.us.us.preheader.i: ; preds = %..loopexit_crit_edge.us.i, %.lr.ph.i %indvars.iv57.i = phi i64 [ 0, %.lr.ph.i ], [ %indvars.iv.next58.i, %..loopexit_crit_edge.us.i ] %indvars.iv43.i = phi i32 [ 0, %.lr.ph.i ], [ %indvars.iv.next44.i, %..loopexit_crit_edge.us.i ] - %64 = zext i32 %indvars.iv43.i to i64 + %64 = sext i32 %indvars.iv43.i to i64 %indvars.iv.next58.i = add nuw nsw i64 %indvars.iv57.i, 1 %65 = mul nuw nsw i64 %indvars.iv.next58.i, 6 - %66 = mul nuw nsw i64 %indvars.iv57.i, %63 - %invariant.gep.i = getelementptr inbounds nuw i32, ptr %62, i64 %66 + %66 = trunc i64 %indvars.iv57.i to i32 + %invariant.gep.i = mul i32 %31, %66 + %67 = zext i32 %66 to i64 + %invariant.gep.i = getelementptr inbounds nuw i32, ptr %62, i64 %67 br label %.lr.ph.us.us.i ..loopexit_crit_edge.us.i: ; preds = %._crit_edge.us.us.i @@ -1521,11 +1524,10 @@ _ZN5zxing3RefINS_15LuminanceSourceEED2Ev.exit: ; preds = %2, %7, %12 br label %81 78: ; preds = %81 - %indvars.iv.next51.i = add i64 %indvars.iv50.i, 1 - %79 = and i64 %indvars.iv.next51.i, 4294967295 - %80 = icmp samesign ult i64 %79, %65 - %indvars.iv.next46.i = add i32 %indvars.iv45.i, %67 - br i1 %80, label %73, label %._crit_edge.us.us.i, !llvm.loop !6 + %indvars.iv.next51.i = add nuw nsw i64 %indvars.iv50.i, 1 + %79 = icmp samesign ult i64 %indvars.iv.next51.i, %65 + %80 = add i32 %indvars.iv45.i, %68 + br i1 %80, label %74, label %._crit_edge.us.us.i, !llvm.loop !6 81: ; preds = %81, %73 %indvars.iv47.i = phi i64 [ %indvars.iv.next48.i, %81 ], [ %74, %73 ] @@ -1542,7 +1544,7 @@ _ZN5zxing3RefINS_15LuminanceSourceEED2Ev.exit: ; preds = %2, %7, %12 store i32 %84, ptr %gep.i, align 4 %indvars.iv.next54.i = add nuw nsw i64 %indvars.iv53.i, 1 %indvars.iv.next.i = add nuw i32 %indvars.iv.i, 6 - %exitcond.not.i = icmp eq i64 %indvars.iv.next54.i, %63 + %exitcond.not.i = icmp eq i64 %indvars.iv.next54.i, %wide.trip.count.i br i1 %exitcond.not.i, label %..loopexit_crit_edge.us.i, label %.lr.ph.us.us.i, !llvm.loop !8 _ZN5zxing19FastWindowBinarizer15calcBlockTotalsEPiS1_ii.exit: ; preds = %..loopexit_crit_edge.us.i @@ -1558,7 +1560,7 @@ _ZN5zxing19FastWindowBinarizer15calcBlockTotalsEPiS1_ii.exit: ; preds = %..loope %91 = load ptr, ptr %89, align 8 %92 = mul nuw nsw i64 %indvars.iv66.i, %90 %93 = getelementptr inbounds nuw i32, ptr %91, i64 %92 - %94 = mul nuw nsw i64 %indvars.iv66.i, %63 + %94 = mul nuw nsw i64 %indvars.iv66.i, %wide.trip.count.i %95 = getelementptr inbounds nuw i32, ptr %86, i64 %94 store i32 0, ptr %93, align 4 br label %96 @@ -1572,7 +1574,7 @@ _ZN5zxing19FastWindowBinarizer15calcBlockTotalsEPiS1_ii.exit: ; preds = %..loope %indvars.iv.next62.i = add nuw nsw i64 %indvars.iv61.i, 1 %100 = getelementptr inbounds nuw i32, ptr %93, i64 %indvars.iv.next62.i store i32 %99, ptr %100, align 4 - %exitcond65.not.i = icmp eq i64 %indvars.iv.next62.i, %63 + %exitcond65.not.i = icmp eq i64 %indvars.iv.next62.i, %wide.trip.count.i br i1 %exitcond65.not.i, label %._crit_edge.us.i, label %96, !llvm.loop !9 ._crit_edge.us.i: ; preds = %96 diff --git a/bench/opencv/optimized/resize.ll b/bench/opencv/optimized/resize.ll index 09680265af5..372abcffedd 100644 --- a/bench/opencv/optimized/resize.ll +++ b/bench/opencv/optimized/resize.ll @@ -1722,6 +1722,7 @@ _ZN2cv10AutoBufferIiLm264EED2Ev.exit469: ; preds = %411, %406, %375 %473 = zext nneg i32 %factor.op.mul546 to i64 %wide.trip.count = zext nneg i32 %73 to i64 %474 = icmp samesign ult i32 %.0376, %factor.op.mul546 + %wide.trip.count594 = zext nneg i32 %factor.op.mul546 to i64 %475 = icmp samesign ult i32 %.0376, %factor.op.mul546 br label %486 @@ -2024,7 +2025,7 @@ _ZN2cvL19interpolateLanczos4EfPf.exit: ; preds = %596, %600, %547 %623 = getelementptr inbounds nuw float, ptr %457, i64 %619 store float %622, ptr %623, align 4 %indvars.iv.next592 = add nuw nsw i64 %indvars.iv591, 1 - %exitcond595.not = icmp eq i64 %indvars.iv.next592, %473 + %exitcond595.not = icmp eq i64 %indvars.iv.next592, %wide.trip.count594 br i1 %exitcond595.not, label %.loopexit531, label %.lr.ph, !llvm.loop !17 .loopexit531: ; preds = %.lr.ph, %.lr.ph545, %.preheader535, %.preheader530 diff --git a/bench/openjdk/optimized/sharedRuntimeTrig.ll b/bench/openjdk/optimized/sharedRuntimeTrig.ll index 2517d1a4e7e..516c18c3408 100644 --- a/bench/openjdk/optimized/sharedRuntimeTrig.ll +++ b/bench/openjdk/optimized/sharedRuntimeTrig.ll @@ -876,7 +876,8 @@ _ZL7scalbnAdi.exit285.i: ; preds = %263 %278 = trunc nuw nsw i64 %indvars.iv124.i to i32 %279 = add nuw nsw i32 %.0231.i, %278 %280 = sext i32 %.0231.i to i64 - br label %281 + %281 = sext i32 %279 to i64 + br label %282 281: ; preds = %._crit_edge52.i, %.preheader12.i %indvars.iv132.i = phi i64 [ %280, %.preheader12.i ], [ %indvars.iv.next133.i, %._crit_edge52.i ] @@ -909,9 +910,8 @@ _ZL7scalbnAdi.exit285.i: ; preds = %263 %.1.lcssa.i = phi double [ 0.000000e+00, %281 ], [ %293, %.lr.ph51.i ] %294 = getelementptr inbounds nuw [20 x double], ptr %6, i64 0, i64 %indvars.iv.next133.i store double %.1.lcssa.i, ptr %294, align 8 - %295 = trunc nsw i64 %indvars.iv.next133.i to i32 - %.not266.not.i = icmp samesign ugt i32 %279, %295 - br i1 %.not266.not.i, label %281, label %.loopexit13.loopexit.i, !llvm.loop !12 + %295 = icmp samesign ult i64 %indvars.iv.next133.i, %281 + br i1 %.not266.not.i, label %282, label %.loopexit13.loopexit.i, !llvm.loop !12 296: ; preds = %296, %.preheader11.i %indvars.iv137.i = phi i64 [ %273, %.preheader11.i ], [ %indvars.iv.next138.i, %296 ] diff --git a/bench/openjdk/optimized/sharedRuntime_x86_64.ll b/bench/openjdk/optimized/sharedRuntime_x86_64.ll index 9644028e03a..296b9a47a01 100644 --- a/bench/openjdk/optimized/sharedRuntime_x86_64.ll +++ b/bench/openjdk/optimized/sharedRuntime_x86_64.ll @@ -8035,9 +8035,10 @@ _ZL13reverse_wordsPmS_i.exit36: ; preds = %.lr.ph.i32 br i1 %exitcond.not, label %._crit_edge.i, label %.lr.ph.i38, !llvm.loop !53 ._crit_edge.i: ; preds = %.lr.ph.i38 - %57 = and i64 %indvars.iv179.i, 1 - %58 = icmp eq i64 %57, 0 - br i1 %58, label %._crit_edge.i.._crit_edge.thread.i_crit_edge, label %._crit_edge._crit_edge.i + %57 = trunc nuw nsw i64 %umax to i32 + %58 = and i64 %indvars.iv179.i, 1 + %59 = icmp eq i64 %58, 0 + br i1 %59, label %._crit_edge.i.._crit_edge.thread.i_crit_edge, label %67 ._crit_edge.i.._crit_edge.thread.i_crit_edge: ; preds = %._crit_edge.i %.pre = load i64, ptr %6, align 8 @@ -8047,41 +8048,47 @@ _ZL13reverse_wordsPmS_i.exit36: ; preds = %.lr.ph.i32 %59 = phi i64 [ %.pre, %._crit_edge.i.._crit_edge.thread.i_crit_edge ], [ 0, %.lr.ph134.i ] %.1.lcssa204.i = phi i64 [ %55, %._crit_edge.i.._crit_edge.thread.i_crit_edge ], [ %.0132.i, %.lr.ph134.i ] %.1102.lcssa203.i = phi i64 [ %56, %._crit_edge.i.._crit_edge.thread.i_crit_edge ], [ %.0101131.i, %.lr.ph134.i ] - %.0111.lcssa201.i = phi i64 [ %umax, %._crit_edge.i.._crit_edge.thread.i_crit_edge ], [ 0, %.lr.ph134.i ] - %60 = getelementptr inbounds nuw i64, ptr %15, i64 %.0111.lcssa201.i - %61 = load i64, ptr %60, align 8 - %62 = call { i64, i64, i64, i64 } asm "mul $5; add %rax, $2; adc %rdx, $3; adc $$0, $4", "=&{dx},={ax},=r,=r,=*imr,r,{ax},2,3,4,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i64) %6, i64 %61, i64 %61, i64 %.1.lcssa204.i, i64 %.1102.lcssa203.i, i64 %59) #14, !srcloc !54 - %63 = extractvalue { i64, i64, i64, i64 } %62, 2 - %64 = extractvalue { i64, i64, i64, i64 } %62, 3 - br label %._crit_edge._crit_edge.i - -._crit_edge._crit_edge.i: ; preds = %._crit_edge.thread.i, %._crit_edge.i - %.0111.lcssa202.i = phi i64 [ %.0111.lcssa201.i, %._crit_edge.thread.i ], [ %umax, %._crit_edge.i ] - %.2103.i = phi i64 [ %64, %._crit_edge.thread.i ], [ %56, %._crit_edge.i ] - %.2.i = phi i64 [ %63, %._crit_edge.thread.i ], [ %55, %._crit_edge.i ] - %65 = icmp samesign ult i64 %.0111.lcssa202.i, %indvars.iv179.i - br i1 %65, label %.lr.ph126.i, label %._crit_edge127.i - -.lr.ph126.i: ; preds = %._crit_edge._crit_edge.i, %.lr.ph126.i - %indvars.iv176.i = phi i64 [ %indvars.iv.next177.i, %.lr.ph126.i ], [ %.0111.lcssa202.i, %._crit_edge._crit_edge.i ] - %.3124.i = phi i64 [ %73, %.lr.ph126.i ], [ %.2.i, %._crit_edge._crit_edge.i ] - %.3104123.i = phi i64 [ %74, %.lr.ph126.i ], [ %.2103.i, %._crit_edge._crit_edge.i ] - %66 = load i64, ptr %6, align 8 - %67 = getelementptr inbounds nuw i64, ptr %19, i64 %indvars.iv176.i - %68 = load i64, ptr %67, align 8 - %69 = sub nuw nsw i64 %indvars.iv179.i, %indvars.iv176.i - %70 = getelementptr inbounds nuw i64, ptr %16, i64 %69 - %71 = load i64, ptr %70, align 8 - %72 = call { i64, i64, i64, i64 } asm "mul $5; add %rax, $2; adc %rdx, $3; adc $$0, $4", "=&{dx},={ax},=r,=r,=*imr,r,{ax},2,3,4,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i64) %6, i64 %68, i64 %71, i64 %.3124.i, i64 %.3104123.i, i64 %66) #14, !srcloc !55 - %73 = extractvalue { i64, i64, i64, i64 } %72, 2 - %74 = extractvalue { i64, i64, i64, i64 } %72, 3 + %.0111.lcssa201.i = phi i32 [ %57, %._crit_edge.i.._crit_edge.thread.i_crit_edge ], [ 0, %.lr.ph134.i ] + %60 = zext nneg i32 %.0111.lcssa201.i to i64 + %61 = getelementptr inbounds nuw i64, ptr %15, i64 %61 + %63 = load i64, ptr %62, align 8 + %63 = call { i64, i64, i64, i64 } asm "mul $5; add %rax, $2; adc %rdx, $3; adc $$0, $4", "=&{dx},={ax},=r,=r,=*imr,r,{ax},2,3,4,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i64) %6, i64 %62, i64 %63, i64 %.1.lcssa204.i, i64 %.1102.lcssa203.i, i64 %60) #14, !srcloc !54 + %64 = extractvalue { i64, i64, i64, i64 } %64, 2 + %66 = extractvalue { i64, i64, i64, i64 } %64, 3 + br label %67 + +67: ; preds = %._crit_edge.thread.i, %._crit_edge.i + %.2103.i = phi i32 [ %.0111.lcssa201.i, %._crit_edge.thread.i ], [ %57, %._crit_edge.i ] + %.2.i = phi i64 [ %66, %._crit_edge.thread.i ], [ %56, %._crit_edge.i ] + %65 = phi i64 [ %65, %._crit_edge.thread.i ], [ %55, %._crit_edge.i ] + %68 = zext nneg i32 %.2.i to i64 + %69 = icmp samesign ugt i64 %indvars.iv179.i, %68 + br i1 %69, label %.lr.ph126.preheader.i, label %._crit_edge127.i + +.lr.ph126.preheader.i:; preds = %67 + %.3104123.i = zext nneg i32 %.2103.i to i64 + br label %.lr.ph126.i + +.lr.ph126.i:; preds = %.lr.ph126.i, %.lr.ph126.preheader.i + %69 = phi i64 [ %70, %.lr.ph126.preheader.i ], [ %indvars.iv.next177.i, %.lr.ph126.i ] + %70 = phi i64 [ %65, %.lr.ph126.preheader.i ], [ %78, %.lr.ph126.i ] + %71 = phi i64 [ %65, %.lr.ph126.preheader.i ], [ %79, %.lr.ph126.i ] + %72 = load i64, ptr %6, align 8 + %73 = getelementptr inbounds nuw i64, ptr %19, i64 %69 + %74 = load i64, ptr %72, align 8 + %74 = sub nuw nsw i64 %indvars.iv179.i, %indvars.iv176.i + %75 = getelementptr inbounds nuw i64, ptr %16, i64 %74 + %76 = load i64, ptr %75, align 8 + %77 = call { i64, i64, i64, i64 } asm "mul $5; add %rax, $2; adc %rdx, $3; adc $$0, $4", "=&{dx},={ax},=r,=r,=*imr,r,{ax},2,3,4,~{cc},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i64) %6, i64 %73, i64 %76, i64 %.3124.i, i64 %.3104123.i, i64 %71) #14, !srcloc !55 + %78 = extractvalue { i64, i64, i64, i64 } %77, 2 + %79 = extractvalue { i64, i64, i64, i64 } %77, 3 %indvars.iv.next177.i = add nuw nsw i64 %indvars.iv176.i, 1 %exitcond.not.i = icmp eq i64 %indvars.iv.next177.i, %indvars.iv179.i br i1 %exitcond.not.i, label %._crit_edge127.i, label %.lr.ph126.i, !llvm.loop !56 -._crit_edge127.i: ; preds = %.lr.ph126.i, %._crit_edge._crit_edge.i - %.3104.lcssa.i = phi i64 [ %.2103.i, %._crit_edge._crit_edge.i ], [ %74, %.lr.ph126.i ] - %.3.lcssa.i = phi i64 [ %.2.i, %._crit_edge._crit_edge.i ], [ %73, %.lr.ph126.i ] +._crit_edge127.i: ; preds = %.lr.ph126.i, %67 + %.3104.lcssa.i = phi i64 [ %.2103.i, %67 ], [ %79, %.lr.ph126.i ] + %.3.lcssa.i = phi i64 [ %.2.i, %67 ], [ %78, %.lr.ph126.i ] %75 = mul i64 %.3.lcssa.i, %3 %76 = getelementptr inbounds nuw i64, ptr %19, i64 %indvars.iv179.i store i64 %75, ptr %76, align 8 diff --git a/bench/openssl/optimized/libcrypto-lib-rsa_pk1.ll b/bench/openssl/optimized/libcrypto-lib-rsa_pk1.ll index 9a5b32da658..3d6ffbe5480 100644 --- a/bench/openssl/optimized/libcrypto-lib-rsa_pk1.ll +++ b/bench/openssl/optimized/libcrypto-lib-rsa_pk1.ll @@ -713,7 +713,6 @@ for.cond.preheader: ; preds = %if.end16 %arrayidx37 = getelementptr inbounds nuw i8, ptr %be_iter, i64 1 %conv43 = zext nneg i32 %llen to i64 %0 = zext nneg i32 %tlen to i64 - %zext = zext nneg i32 %tlen to i64 br label %for.body if.then20: ; preds = %if.end16 @@ -776,7 +775,7 @@ if.then53: ; preds = %if.end48 if.end54: ; preds = %if.end48 store i32 32, ptr %md_len, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32 - %1 = icmp samesign ugt i64 %indvars.iv.next, %zext + %1 = icmp samesign ugt i64 %indvars.iv.next, %0 br i1 %1, label %if.then57, label %if.else if.then57: ; preds = %if.end54 @@ -810,7 +809,7 @@ if.then71: ; preds = %if.else for.inc: ; preds = %if.end63, %if.else %inc = add i16 %iter.026, 1 - %3 = icmp samesign ult i64 %indvars.iv.next, %zext + %3 = icmp samesign ult i64 %indvars.iv.next, %0 br i1 %3, label %for.body, label %err, !llvm.loop !17 err: ; preds = %for.inc, %if.then71, %if.then62, %if.then53, %if.then47, %if.then41, %if.then27, %if.then20, %if.then15, %if.then10 diff --git a/bench/postgres/optimized/zic.ll b/bench/postgres/optimized/zic.ll index 6d48e9caaa2..a903fc5b143 100644 --- a/bench/postgres/optimized/zic.ll +++ b/bench/postgres/optimized/zic.ll @@ -3671,22 +3671,25 @@ thread-pre-split.i.i.i: ; preds = %.critedge2.i.i.i .lr.ph44.i.i.i: ; preds = %1572, %.lr.ph44.i.preheader.i.i %indvars.iv.i.i = phi i64 [ %1564, %.lr.ph44.i.preheader.i.i ], [ %indvars.iv.next.i.i, %1572 ] %indvars.iv.next.i.i = add nsw i64 %indvars.iv.i.i, -1 - %1565 = trunc i64 %indvars.iv.next.i.i to i32 + %1565 = trunc nsw i64 %indvars.iv.next.i.i to i32 %1566 = add i32 %1553, %1565 %1567 = sext i32 %1566 to i64 %1568 = getelementptr [50 x i64], ptr @trans, i64 0, i64 %1567 %1569 = load i64, ptr %1568, align 8, !noalias !31 %1570 = icmp slt i64 %1554, %1569 - %1571 = trunc i64 %indvars.iv.i.i to i32 - br i1 %1570, label %1572, label %limitrange.exit.i.i + br i1 %1570, label %1571, label %limitrange.exit.i.i 1572: ; preds = %.lr.ph44.i.i.i - %1573 = icmp samesign ugt i32 %1571, 1 + %1573 = icmp samesign ugt i64 %indvars.iv.i.i, 1 br i1 %1573, label %.lr.ph44.i.i.i, label %limitrange.exit.i.i, !llvm.loop !37 -limitrange.exit.i.i: ; preds = %1572, %.lr.ph44.i.i.i, %.critedge4.i.i.i, %.critedge2.i.i.i - %.sroa.12.1.i.i = phi i64 [ %.pr.i.i.i, %.critedge2.i.i.i ], [ %.sroa.12.0.i.i, %.critedge4.i.i.i ], [ %.sroa.12.0.i.i, %.lr.ph44.i.i.i ], [ %.sroa.12.0.i.i, %1572 ] - %.sroa.22.0.i.i = phi i32 [ %.pr15.i.i.i, %.critedge2.i.i.i ], [ %.pr15.i.i.i, %.critedge4.i.i.i ], [ 0, %1572 ], [ %1571, %.lr.ph44.i.i.i ] +limitrange.exit.i.i: ; preds = %.lr.ph44.i.i.i + %.sroa.12.1.i.i = trunc nsw i64 %indvars.iv.i.i to i32 + br label %limitrange.exit.i.i + +limitrange.exit.i.i: ; preds = %1571, %limitrange.exit.loopexit.split.loop.exit863.i.i, %.critedge4.i.i.i, %.critedge2.i.i.i + %.sroa.12.1.i.i = phi i64 [ %.pr.i.i.i, %.critedge2.i.i.i ], [ %.sroa.12.0.i.i, %.critedge4.i.i.i ], [ %.sroa.12.0.i.i, %limitrange.exit.loopexit.split.loop.exit863.i.i ], [ %.sroa.12.0.i.i, %1571 ] + %.sroa.22.0.i.i = phi i32 [ %.pr15.i.i.i, %.critedge2.i.i.i ], [ %.pr15.i.i.i, %.critedge4.i.i.i ], [ %1573, %limitrange.exit.loopexit.split.loop.exit863.i.i ], [ 0, %1571 ] %1574 = icmp sgt i64 %.sroa.12.1.i.i, 0 br i1 %1574, label %.lr.ph.preheader.i483.i.i, label %.critedge.i462.i.i @@ -3782,24 +3785,27 @@ limitrange.exit.i.i: ; preds = %1572, %.lr.ph44.i.i %1614 = zext nneg i32 %.pr15.i468.i.i to i64 br label %.lr.ph44.i473.i.i -.lr.ph44.i473.i.i: ; preds = %1622, %.lr.ph44.i473.preheader.i.i - %indvars.iv800.i.i = phi i64 [ %1614, %.lr.ph44.i473.preheader.i.i ], [ %indvars.iv.next801.i.i, %1622 ] +.lr.ph44.i473.i.i: ; preds = %1621, %.lr.ph44.i473.preheader.i.i + %indvars.iv800.i.i = phi i64 [ %1614, %.lr.ph44.i473.preheader.i.i ], [ %indvars.iv.next801.i.i, %1621 ] %indvars.iv.next801.i.i = add nsw i64 %indvars.iv800.i.i, -1 - %1615 = trunc i64 %indvars.iv.next801.i.i to i32 + %1615 = trunc nsw i64 %indvars.iv.next801.i.i to i32 %1616 = add i32 %1604, %1615 %1617 = sext i32 %1616 to i64 %1618 = getelementptr [50 x i64], ptr @trans, i64 0, i64 %1617 %1619 = load i64, ptr %1618, align 8, !noalias !38 %1620 = icmp sgt i64 %1619, 2147483648 - %1621 = trunc i64 %indvars.iv800.i.i to i32 - br i1 %1620, label %1622, label %limitrange.exit485.i.i + br i1 %1620, label %1621, label %limitrange.exit485.loopexit.split.loop.exit868.i.i + +1621: ; preds = %.lr.ph44.i473.i.i + %1622 = icmp samesign ugt i64 %indvars.iv800.i.i, 1 + br i1 %1622, label %.lr.ph44.i473.i.i, label %limitrange.exit485.i.i, !llvm.loop !37 1622: ; preds = %.lr.ph44.i473.i.i - %1623 = icmp samesign ugt i32 %1621, 1 - br i1 %1623, label %.lr.ph44.i473.i.i, label %limitrange.exit485.i.i, !llvm.loop !37 + %1623 = trunc nsw i64 %indvars.iv800.i.i to i32 + br label %limitrange.exit485.i.i -limitrange.exit485.i.i: ; preds = %1622, %.lr.ph44.i473.i.i, %.critedge4.i470.i.i - %.sroa.22622.0.i.i = phi i32 [ %.pr15.i468.i.i, %.critedge4.i470.i.i ], [ 0, %1622 ], [ %1621, %.lr.ph44.i473.i.i ] +limitrange.exit485.i.i: ; preds = %1621, %1622, %.critedge4.i470.i.i + %.sroa.22622.0.i.i = phi i32 [ %.pr15.i468.i.i, %.critedge4.i470.i.i ], [ %1623, %1622 ], [ 0, %1621 ] %1624 = call i32 @remove(ptr noundef %1387) #26 %1625 = icmp eq i32 %1624, 0 br i1 %1625, label %1635, label %1626 diff --git a/bench/qemu/optimized/block_qcow2-cluster.ll b/bench/qemu/optimized/block_qcow2-cluster.ll index e0e3bc8b481..8709b49730f 100644 --- a/bench/qemu/optimized/block_qcow2-cluster.ll +++ b/bench/qemu/optimized/block_qcow2-cluster.ll @@ -954,7 +954,7 @@ if.end32.i: ; preds = %if.then23.i, %if.el %55 = load i32, ptr %subclusters_per_cluster.i, align 4 %cmp35.i = icmp uge i32 %.pn.i, %55 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %56 = icmp ult i64 %indvars.iv.next, %shr.i118 + %56 = icmp samesign ult i64 %indvars.iv.next, %shr.i118 %or.cond46.i = select i1 %cmp35.i, i1 %56, i1 false br i1 %or.cond46.i, label %for.body.i, label %count_contiguous_subclusters.exit, !llvm.loop !11 diff --git a/bench/raylib/optimized/raudio.ll b/bench/raylib/optimized/raudio.ll index d450f104488..0286bae3b1e 100644 --- a/bench/raylib/optimized/raudio.ll +++ b/bench/raylib/optimized/raudio.ll @@ -80925,11 +80925,7 @@ drmp3_L12_apply_scf_384.exit: ; preds = %2193, %2170 define internal fastcc void @drmp3d_synth_granule(ptr noundef captures(none) %0, ptr noundef nonnull captures(none) %1, i32 noundef range(i32 12, 19) %2, i32 noundef %3, ptr noundef nonnull writeonly captures(none) %4, ptr noundef nonnull captures(none) %5) unnamed_addr #52 { %7 = alloca [4 x [8 x <4 x float>]], align 16 %8 = icmp sgt i32 %3, 0 - br i1 %8, label %.lr.ph, label %.._crit_edge_crit_edge - -.._crit_edge_crit_edge: ; preds = %6 - %.pre = zext nneg i32 %2 to i64 - br label %._crit_edge + br i1 %8, label %.lr.ph, label %._crit_edge .lr.ph: ; preds = %6 %9 = add nsw i32 %2, -3 @@ -81200,8 +81196,7 @@ drmp3d_DCT_II.exit: ; preds = %189 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %._crit_edge, label %19 -._crit_edge: ; preds = %drmp3d_DCT_II.exit, %.._crit_edge_crit_edge - %.pre-phi = phi i64 [ %.pre, %.._crit_edge_crit_edge ], [ %18, %drmp3d_DCT_II.exit ] +._crit_edge: ; preds = %drmp3d_DCT_II.exit, %6 tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(3840) %5, ptr noundef nonnull align 4 dereferenceable(3840) %0, i64 3840, i1 false) %191 = shl nsw i32 %3, 5 %192 = add nsw i32 %3, -1 @@ -81210,7 +81205,8 @@ drmp3d_DCT_II.exit: ; preds = %189 %195 = sext i32 %192 to i64 %196 = sext i32 %191 to i64 %197 = sext i32 %3 to i64 - br label %198 + %198 = zext nneg i32 %2 to i64 + br label %199 198: ; preds = %._crit_edge, %drmp3d_synth.exit %indvars.iv39 = phi i64 [ 0, %._crit_edge ], [ %indvars.iv.next40, %drmp3d_synth.exit ] @@ -81512,7 +81508,7 @@ drmp3d_DCT_II.exit: ; preds = %189 drmp3d_synth.exit: ; preds = %234 %indvars.iv.next40 = add nuw nsw i64 %indvars.iv39, 2 - %456 = icmp samesign ult i64 %indvars.iv.next40, %.pre-phi + %456 = icmp samesign ult i64 %indvars.iv.next40, %198 br i1 %456, label %198, label %457 457: ; preds = %drmp3d_synth.exit diff --git a/scripts/setup_pre_commit_patch.sh b/scripts/setup_pre_commit_patch.sh index c5409e09ef4..dfe24d5adca 100755 --- a/scripts/setup_pre_commit_patch.sh +++ b/scripts/setup_pre_commit_patch.sh @@ -2,7 +2,7 @@ set -euo pipefail shopt -s inherit_errexit -export GITHUB_PATCH_ID="/llvm-project/commit/" +export GITHUB_PATCH_ID=llvm/llvm-project/pull/125764 export COMPTIME_MODE=0 # Please rebase manually