Skip to content

Commit 3bdd397

Browse files
authored
[VectorCombine] Relax vector type constraint on bitop(bitcast, bitcast) (#157245)
Inspired by llvm/llvm-project#157131. This patch allows `bitop(bitcast, bitcast) -> bitcast(bitop)` for scalar integer types.
1 parent 2308d7b commit 3bdd397

File tree

2 files changed

+80
-15
lines changed

2 files changed

+80
-15
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -870,38 +870,39 @@ bool VectorCombine::foldBitOpOfCastops(Instruction &I) {
870870
if (LHSSrc->getType() != RHSSrc->getType())
871871
return false;
872872

873-
// Only handle vector types with integer elements
874-
auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->getType());
875-
auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
876-
if (!SrcVecTy || !DstVecTy)
873+
auto *SrcTy = LHSSrc->getType();
874+
auto *DstTy = I.getType();
875+
// Bitcasts can handle scalar/vector mixes, such as i16 -> <16 x i1>.
876+
// Other casts only handle vector types with integer elements.
877+
if (CastOpcode != Instruction::BitCast &&
878+
(!isa<FixedVectorType>(SrcTy) || !isa<FixedVectorType>(DstTy)))
877879
return false;
878880

879-
if (!SrcVecTy->getScalarType()->isIntegerTy() ||
880-
!DstVecTy->getScalarType()->isIntegerTy())
881+
// Only integer scalar/vector values are legal for bitwise logic operations.
882+
if (!SrcTy->getScalarType()->isIntegerTy() ||
883+
!DstTy->getScalarType()->isIntegerTy())
881884
return false;
882885

883886
// Cost Check :
884887
// OldCost = bitlogic + 2*casts
885888
// NewCost = bitlogic + cast
886889

887890
// Calculate specific costs for each cast with instruction context
888-
InstructionCost LHSCastCost =
889-
TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy,
890-
TTI::CastContextHint::None, CostKind, LHSCast);
891-
InstructionCost RHSCastCost =
892-
TTI.getCastInstrCost(CastOpcode, DstVecTy, SrcVecTy,
893-
TTI::CastContextHint::None, CostKind, RHSCast);
891+
InstructionCost LHSCastCost = TTI.getCastInstrCost(
892+
CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind, LHSCast);
893+
InstructionCost RHSCastCost = TTI.getCastInstrCost(
894+
CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind, RHSCast);
894895

895896
InstructionCost OldCost =
896-
TTI.getArithmeticInstrCost(BinOp->getOpcode(), DstVecTy, CostKind) +
897+
TTI.getArithmeticInstrCost(BinOp->getOpcode(), DstTy, CostKind) +
897898
LHSCastCost + RHSCastCost;
898899

899900
// For new cost, we can't provide an instruction (it doesn't exist yet)
900901
InstructionCost GenericCastCost = TTI.getCastInstrCost(
901-
CastOpcode, DstVecTy, SrcVecTy, TTI::CastContextHint::None, CostKind);
902+
CastOpcode, DstTy, SrcTy, TTI::CastContextHint::None, CostKind);
902903

903904
InstructionCost NewCost =
904-
TTI.getArithmeticInstrCost(BinOp->getOpcode(), SrcVecTy, CostKind) +
905+
TTI.getArithmeticInstrCost(BinOp->getOpcode(), SrcTy, CostKind) +
905906
GenericCastCost;
906907

907908
// Account for multi-use casts using specific costs

llvm/test/Transforms/VectorCombine/X86/bitop-of-castops.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,19 @@ define <2 x i16> @and_bitcast_f32_to_v2i16_constant(float %a) {
433433
ret <2 x i16> %and
434434
}
435435

436+
define <2 x i16> @and_bitcast_f32_to_v2i16(float %a, float %b) {
437+
; CHECK-LABEL: @and_bitcast_f32_to_v2i16(
438+
; CHECK-NEXT: [[BC1:%.*]] = bitcast float [[A:%.*]] to <2 x i16>
439+
; CHECK-NEXT: [[BC2:%.*]] = bitcast float [[B:%.*]] to <2 x i16>
440+
; CHECK-NEXT: [[AND:%.*]] = and <2 x i16> [[BC1]], [[BC2]]
441+
; CHECK-NEXT: ret <2 x i16> [[AND]]
442+
;
443+
%bc1 = bitcast float %a to <2 x i16>
444+
%bc2 = bitcast float %b to <2 x i16>
445+
%and = and <2 x i16> %bc1, %bc2
446+
ret <2 x i16> %and
447+
}
448+
436449
; Negative test: bitcast from vector float to scalar int (optimization should not apply)
437450
define i64 @and_bitcast_v2f32_to_i64_constant(<2 x float> %a) {
438451
; CHECK-LABEL: @and_bitcast_v2f32_to_i64_constant(
@@ -445,6 +458,19 @@ define i64 @and_bitcast_v2f32_to_i64_constant(<2 x float> %a) {
445458
ret i64 %and
446459
}
447460

461+
define i64 @and_bitcast_v2f32_to_i64(<2 x float> %a, <2 x float> %b) {
462+
; CHECK-LABEL: @and_bitcast_v2f32_to_i64(
463+
; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x float> [[A:%.*]] to i64
464+
; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x float> [[B:%.*]] to i64
465+
; CHECK-NEXT: [[AND:%.*]] = and i64 [[BC1]], [[BC2]]
466+
; CHECK-NEXT: ret i64 [[AND]]
467+
;
468+
%bc1 = bitcast <2 x float> %a to i64
469+
%bc2 = bitcast <2 x float> %b to i64
470+
%and = and i64 %bc1, %bc2
471+
ret i64 %and
472+
}
473+
448474
; Test no-op bitcast
449475
define i16 @xor_bitcast_i16_to_i16_constant(i16 %a) {
450476
; CHECK-LABEL: @xor_bitcast_i16_to_i16_constant(
@@ -457,6 +483,19 @@ define i16 @xor_bitcast_i16_to_i16_constant(i16 %a) {
457483
ret i16 %or
458484
}
459485

486+
define i16 @xor_bitcast_i16_to_i16(i16 %a, i16 %b) {
487+
; CHECK-LABEL: @xor_bitcast_i16_to_i16(
488+
; CHECK-NEXT: [[BC1:%.*]] = bitcast i16 [[A:%.*]] to i16
489+
; CHECK-NEXT: [[BC2:%.*]] = bitcast i16 [[B:%.*]] to i16
490+
; CHECK-NEXT: [[OR:%.*]] = xor i16 [[BC1]], [[BC2]]
491+
; CHECK-NEXT: ret i16 [[OR]]
492+
;
493+
%bc1 = bitcast i16 %a to i16
494+
%bc2 = bitcast i16 %b to i16
495+
%or = xor i16 %bc1, %bc2
496+
ret i16 %or
497+
}
498+
460499
; Test bitwise operations with integer vector to integer bitcast
461500
define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) {
462501
; CHECK-LABEL: @xor_bitcast_i16_to_v16i1_constant(
@@ -469,6 +508,18 @@ define <16 x i1> @xor_bitcast_i16_to_v16i1_constant(i16 %a) {
469508
ret <16 x i1> %or
470509
}
471510

511+
define <16 x i1> @xor_bitcast_i16_to_v16i1(i16 %a, i16 %b) {
512+
; CHECK-LABEL: @xor_bitcast_i16_to_v16i1(
513+
; CHECK-NEXT: [[B1:%.*]] = xor i16 [[A:%.*]], [[B:%.*]]
514+
; CHECK-NEXT: [[BC3:%.*]] = bitcast i16 [[B1]] to <16 x i1>
515+
; CHECK-NEXT: ret <16 x i1> [[BC3]]
516+
;
517+
%bc1 = bitcast i16 %a to <16 x i1>
518+
%bc2 = bitcast i16 %b to <16 x i1>
519+
%or = xor <16 x i1> %bc1, %bc2
520+
ret <16 x i1> %or
521+
}
522+
472523
; Test bitwise operations with integer vector to integer bitcast
473524
define i16 @or_bitcast_v16i1_to_i16_constant(<16 x i1> %a) {
474525
; CHECK-LABEL: @or_bitcast_v16i1_to_i16_constant(
@@ -480,3 +531,16 @@ define i16 @or_bitcast_v16i1_to_i16_constant(<16 x i1> %a) {
480531
%or = or i16 %bc, 3
481532
ret i16 %or
482533
}
534+
535+
define i16 @or_bitcast_v16i1_to_i16(<16 x i1> %a, <16 x i1> %b) {
536+
; CHECK-LABEL: @or_bitcast_v16i1_to_i16(
537+
; CHECK-NEXT: [[BC1:%.*]] = bitcast <16 x i1> [[A:%.*]] to i16
538+
; CHECK-NEXT: [[BC2:%.*]] = bitcast <16 x i1> [[B:%.*]] to i16
539+
; CHECK-NEXT: [[OR:%.*]] = or i16 [[BC1]], [[BC2]]
540+
; CHECK-NEXT: ret i16 [[OR]]
541+
;
542+
%bc1 = bitcast <16 x i1> %a to i16
543+
%bc2 = bitcast <16 x i1> %b to i16
544+
%or = or i16 %bc1, %bc2
545+
ret i16 %or
546+
}

0 commit comments

Comments
 (0)