diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index f1b225c0f238a..3c0836da3c343 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1846,34 +1846,45 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) { if (CastOpcode != Cast1->getOpcode()) return nullptr; - // If the source types do not match, but the casts are matching extends, we - // can still narrow the logic op. - if (SrcTy != Cast1->getSrcTy()) { - Value *X, *Y; - if (match(Cast0, m_OneUse(m_ZExtOrSExt(m_Value(X)))) && - match(Cast1, m_OneUse(m_ZExtOrSExt(m_Value(Y))))) { - // Cast the narrower source to the wider source type. - unsigned XNumBits = X->getType()->getScalarSizeInBits(); - unsigned YNumBits = Y->getType()->getScalarSizeInBits(); - if (XNumBits < YNumBits) + // Can't fold it profitably if no one of casts has one use. + if (!Cast0->hasOneUse() && !Cast1->hasOneUse()) + return nullptr; + + Value *X, *Y; + if (match(Cast0, m_ZExtOrSExt(m_Value(X))) && + match(Cast1, m_ZExtOrSExt(m_Value(Y)))) { + // Cast the narrower source to the wider source type. + unsigned XNumBits = X->getType()->getScalarSizeInBits(); + unsigned YNumBits = Y->getType()->getScalarSizeInBits(); + if (XNumBits != YNumBits) { + // Cast the narrower source to the wider source type only if both of casts + // have one use to avoid creating an extra instruction. + if (!Cast0->hasOneUse() || !Cast1->hasOneUse()) + return nullptr; + + // If the source types do not match, but the casts are matching extends, + // we can still narrow the logic op. + if (XNumBits < YNumBits) { X = Builder.CreateCast(CastOpcode, X, Y->getType()); - else + } else if (YNumBits < XNumBits) { Y = Builder.CreateCast(CastOpcode, Y, X->getType()); - // Do the logic op in the intermediate width, then widen more. - Value *NarrowLogic = Builder.CreateBinOp(LogicOpc, X, Y); - return CastInst::Create(CastOpcode, NarrowLogic, DestTy); + } } - // Give up for other cast opcodes. - return nullptr; + // Do the logic op in the intermediate width, then widen more. + Value *NarrowLogic = Builder.CreateBinOp(LogicOpc, X, Y, I.getName()); + return CastInst::Create(CastOpcode, NarrowLogic, DestTy); } + // If the src type of casts are different, give up for other cast opcodes. + if (SrcTy != Cast1->getSrcTy()) + return nullptr; + Value *Cast0Src = Cast0->getOperand(0); Value *Cast1Src = Cast1->getOperand(0); // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) - if ((Cast0->hasOneUse() || Cast1->hasOneUse()) && - shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { + if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src, I.getName()); return CastInst::Create(CastOpcode, NewOp, DestTy); diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll index 5a58995f6c315..74d1f2119a2ad 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -2,6 +2,8 @@ ; RUN: opt < %s -passes=instcombine -S | FileCheck %s declare void @use(i32) +declare void @use_i16(i8) +declare void @use_2xi16(<2 x i16>) declare void @use_i8(i8) declare void @use_i1(i1) @@ -4250,8 +4252,8 @@ define i16 @and_zext_zext(i8 %x, i4 %y) { ; CHECK-LABEL: define {{[^@]+}}@and_zext_zext ; CHECK-SAME: (i8 [[X:%.*]], i4 [[Y:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[Y]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = zext nneg i8 [[TMP2]] to i16 +; CHECK-NEXT: [[R1:%.*]] = and i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = zext nneg i8 [[R1]] to i16 ; CHECK-NEXT: ret i16 [[R]] ; %zx = zext i8 %x to i16 @@ -4260,12 +4262,41 @@ define i16 @and_zext_zext(i8 %x, i4 %y) { ret i16 %r } +define i16 @and_zext_zext_2(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@and_zext_zext_2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[R1:%.*]] = and i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = zext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %zx = zext i8 %x to i16 + %zy = zext i8 %y to i16 + %r = and i16 %zx, %zy + ret i16 %r +} + +define i16 @and_zext_zext_2_use1(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@and_zext_zext_2_use1 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[ZX:%.*]] = zext i8 [[X]] to i16 +; CHECK-NEXT: call void @use_i16(i16 [[ZX]]) +; CHECK-NEXT: [[R1:%.*]] = and i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = zext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %zx = zext i8 %x to i16 + call void @use_i16(i16 %zx) + %zy = zext i8 %y to i16 + %r = and i16 %zx, %zy + ret i16 %r +} + define i16 @or_zext_zext(i8 %x, i4 %y) { ; CHECK-LABEL: define {{[^@]+}}@or_zext_zext ; CHECK-SAME: (i8 [[X:%.*]], i4 [[Y:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[Y]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = zext i8 [[TMP2]] to i16 +; CHECK-NEXT: [[R1:%.*]] = or i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = zext i8 [[R1]] to i16 ; CHECK-NEXT: ret i16 [[R]] ; %zx = zext i8 %x to i16 @@ -4274,12 +4305,41 @@ define i16 @or_zext_zext(i8 %x, i4 %y) { ret i16 %r } +define i16 @or_zext_zext_2(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@or_zext_zext_2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[R1:%.*]] = or i8 [[Y]], [[X]] +; CHECK-NEXT: [[R:%.*]] = zext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %zx = zext i8 %x to i16 + %zy = zext i8 %y to i16 + %r = or i16 %zy, %zx + ret i16 %r +} + +define i16 @or_zext_zext_2_use1(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@or_zext_zext_2_use1 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[ZX:%.*]] = zext i8 [[X]] to i16 +; CHECK-NEXT: call void @use_i16(i16 [[ZX]]) +; CHECK-NEXT: [[R1:%.*]] = or i8 [[Y]], [[X]] +; CHECK-NEXT: [[R:%.*]] = zext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %zx = zext i8 %x to i16 + call void @use_i16(i16 %zx) + %zy = zext i8 %y to i16 + %r = or i16 %zy, %zx + ret i16 %r +} + define <2 x i16> @xor_zext_zext(<2 x i8> %x, <2 x i4> %y) { ; CHECK-LABEL: define {{[^@]+}}@xor_zext_zext ; CHECK-SAME: (<2 x i8> [[X:%.*]], <2 x i4> [[Y:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i4> [[Y]] to <2 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i8> [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16> +; CHECK-NEXT: [[R1:%.*]] = xor <2 x i8> [[X]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = zext <2 x i8> [[R1]] to <2 x i16> ; CHECK-NEXT: ret <2 x i16> [[R]] ; %zx = zext <2 x i8> %x to <2 x i16> @@ -4288,12 +4348,41 @@ define <2 x i16> @xor_zext_zext(<2 x i8> %x, <2 x i4> %y) { ret <2 x i16> %r } +define <2 x i16> @xor_zext_zext_2(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: define {{[^@]+}}@xor_zext_zext_2 +; CHECK-SAME: (<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { +; CHECK-NEXT: [[R1:%.*]] = xor <2 x i8> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = zext <2 x i8> [[R1]] to <2 x i16> +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %zx = zext <2 x i8> %x to <2 x i16> + %zy = zext <2 x i8> %y to <2 x i16> + %r = xor <2 x i16> %zx, %zy + ret <2 x i16> %r +} + +define <2 x i16> @xor_zext_zext_2_use1(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: define {{[^@]+}}@xor_zext_zext_2_use1 +; CHECK-SAME: (<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) { +; CHECK-NEXT: [[ZX:%.*]] = zext <2 x i8> [[X]] to <2 x i16> +; CHECK-NEXT: call void @use_2xi16(<2 x i16> [[ZX]]) +; CHECK-NEXT: [[R1:%.*]] = xor <2 x i8> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = zext <2 x i8> [[R1]] to <2 x i16> +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %zx = zext <2 x i8> %x to <2 x i16> + call void @use_2xi16(<2 x i16> %zx) + %zy = zext <2 x i8> %y to <2 x i16> + %r = xor <2 x i16> %zx, %zy + ret <2 x i16> %r +} + define i16 @and_sext_sext(i8 %x, i4 %y) { ; CHECK-LABEL: define {{[^@]+}}@and_sext_sext ; CHECK-SAME: (i8 [[X:%.*]], i4 [[Y:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[Y]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = sext i8 [[TMP2]] to i16 +; CHECK-NEXT: [[R1:%.*]] = and i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 ; CHECK-NEXT: ret i16 [[R]] ; %sx = sext i8 %x to i16 @@ -4302,12 +4391,41 @@ define i16 @and_sext_sext(i8 %x, i4 %y) { ret i16 %r } +define i16 @and_sext_sext_2(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@and_sext_sext_2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[R1:%.*]] = and i8 [[Y]], [[X]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %sx = sext i8 %x to i16 + %sy = sext i8 %y to i16 + %r = and i16 %sy, %sx + ret i16 %r +} + +define i16 @and_sext_sext_2_use1(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@and_sext_sext_2_use1 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = sext i8 [[X]] to i16 +; CHECK-NEXT: call void @use_i16(i16 [[SX]]) +; CHECK-NEXT: [[R1:%.*]] = and i8 [[Y]], [[X]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %sx = sext i8 %x to i16 + call void @use_i16(i16 %sx) + %sy = sext i8 %y to i16 + %r = and i16 %sy, %sx + ret i16 %r +} + define i16 @or_sext_sext(i8 %x, i4 %y) { ; CHECK-LABEL: define {{[^@]+}}@or_sext_sext ; CHECK-SAME: (i8 [[X:%.*]], i4 [[Y:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[Y]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = sext i8 [[TMP2]] to i16 +; CHECK-NEXT: [[R1:%.*]] = or i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 ; CHECK-NEXT: ret i16 [[R]] ; %sx = sext i8 %x to i16 @@ -4316,12 +4434,41 @@ define i16 @or_sext_sext(i8 %x, i4 %y) { ret i16 %r } +define i16 @or_sext_sext_2(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@or_sext_sext_2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[R1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %sx = sext i8 %x to i16 + %sy = sext i8 %y to i16 + %r = or i16 %sx, %sy + ret i16 %r +} + +define i16 @or_sext_sext_2_use1(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@or_sext_sext_2_use1 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = sext i8 [[X]] to i16 +; CHECK-NEXT: call void @use_i16(i16 [[SX]]) +; CHECK-NEXT: [[R1:%.*]] = or i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %sx = sext i8 %x to i16 + call void @use_i16(i16 %sx) + %sy = sext i8 %y to i16 + %r = or i16 %sx, %sy + ret i16 %r +} + define i16 @xor_sext_sext(i8 %x, i4 %y) { ; CHECK-LABEL: define {{[^@]+}}@xor_sext_sext ; CHECK-SAME: (i8 [[X:%.*]], i4 [[Y:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[Y]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[X]], [[TMP1]] -; CHECK-NEXT: [[R:%.*]] = sext i8 [[TMP2]] to i16 +; CHECK-NEXT: [[R1:%.*]] = xor i8 [[X]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 ; CHECK-NEXT: ret i16 [[R]] ; %sx = sext i8 %x to i16 @@ -4330,6 +4477,35 @@ define i16 @xor_sext_sext(i8 %x, i4 %y) { ret i16 %r } +define i16 @xor_sext_sext_2(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@xor_sext_sext_2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[R1:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %sx = sext i8 %x to i16 + %sy = sext i8 %y to i16 + %r = xor i16 %sx, %sy + ret i16 %r +} + +define i16 @xor_sext_sext_2_use1(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@xor_sext_sext_2_use1 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = sext i8 [[X]] to i16 +; CHECK-NEXT: call void @use_i16(i16 [[SX]]) +; CHECK-NEXT: [[R1:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = sext i8 [[R1]] to i16 +; CHECK-NEXT: ret i16 [[R]] +; + %sx = sext i8 %x to i16 + call void @use_i16(i16 %sx) + %sy = sext i8 %y to i16 + %r = xor i16 %sx, %sy + ret i16 %r +} + ; negative test - mismatched casts define i16 @and_zext_sext(i8 %x, i4 %y) { @@ -4364,6 +4540,24 @@ define i32 @and_zext_zext_use1(i8 %x, i4 %y) { ret i32 %r } +define i32 @and_zext_zext_use2(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@and_zext_zext_use2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[ZX:%.*]] = zext i8 [[X]] to i32 +; CHECK-NEXT: call void @use(i32 [[ZX]]) +; CHECK-NEXT: [[ZY:%.*]] = zext i8 [[Y]] to i32 +; CHECK-NEXT: call void @use(i32 [[ZY]]) +; CHECK-NEXT: [[R:%.*]] = and i32 [[ZX]], [[ZY]] +; CHECK-NEXT: ret i32 [[R]] +; + %zx = zext i8 %x to i32 + call void @use(i32 %zx) + %zy = zext i8 %y to i32 + call void @use(i32 %zy) + %r = and i32 %zx, %zy + ret i32 %r +} + ; negative test - don't create an extra instruction define i32 @or_sext_sext_use1(i8 %x, i4 %y) { @@ -4382,6 +4576,24 @@ define i32 @or_sext_sext_use1(i8 %x, i4 %y) { ret i32 %r } +define i32 @or_sext_sext_use2(i8 %x, i8 %y) { +; CHECK-LABEL: define {{[^@]+}}@or_sext_sext_use2 +; CHECK-SAME: (i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[SX:%.*]] = sext i8 [[X]] to i32 +; CHECK-NEXT: call void @use(i32 [[SX]]) +; CHECK-NEXT: [[SY:%.*]] = sext i8 [[Y]] to i32 +; CHECK-NEXT: call void @use(i32 [[SY]]) +; CHECK-NEXT: [[R:%.*]] = or i32 [[SX]], [[SY]] +; CHECK-NEXT: ret i32 [[R]] +; + %sx = sext i8 %x to i32 + call void @use(i32 %sx) + %sy = sext i8 %y to i32 + call void @use(i32 %sy) + %r = or i32 %sx, %sy + ret i32 %r +} + define i1 @PR56294(i8 %x) { ; CHECK-LABEL: define {{[^@]+}}@PR56294 ; CHECK-SAME: (i8 [[X:%.*]]) {