Skip to content

Commit 1f82553

Browse files
committed
[SLP]Fix mixing xor instructions in the same opcode analysis
Xor with 0 operand should not be compatible with multiplications-based instructions, only with or/xor/add/sub. Fixes #161140
1 parent 7b96dfb commit 1f82553

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,9 @@ class BinOpSameOpcodeHelper {
11001100
// constant + x cannot be -constant - x
11011101
// instead, it should be x - -constant
11021102
if (Pos == 1 ||
1103-
(FromOpcode == Instruction::Add && ToOpcode == Instruction::Sub))
1103+
((FromOpcode == Instruction::Add || FromOpcode == Instruction::Or ||
1104+
FromOpcode == Instruction::Xor) &&
1105+
ToOpcode == Instruction::Sub))
11041106
return SmallVector<Value *>({LHS, RHS});
11051107
return SmallVector<Value *>({RHS, LHS});
11061108
}
@@ -1188,6 +1190,10 @@ class BinOpSameOpcodeHelper {
11881190
if (CIValue.isAllOnes())
11891191
InterchangeableMask = CanBeAll;
11901192
break;
1193+
case Instruction::Xor:
1194+
if (CIValue.isZero())
1195+
InterchangeableMask = XorBIT | OrBIT | AndBIT | SubBIT | AddBIT;
1196+
break;
11911197
default:
11921198
if (CIValue.isZero())
11931199
InterchangeableMask = CanBeAll;

llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ define i1 @foo(i1 %v) { ; assume %v is 1
66
; CHECK-NEXT: [[ENTRY:.*:]]
77
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> poison, i1 [[V]], i32 0
88
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[TMP0]], <2 x i1> poison, <2 x i32> zeroinitializer
9-
; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i1> <i1 false, i1 true>, [[TMP1]]
9+
; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> zeroinitializer, [[TMP1]]
1010
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
1111
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
1212
; CHECK-NEXT: [[SUB:%.*]] = sub i1 [[TMP3]], [[TMP4]]

0 commit comments

Comments
 (0)