Skip to content

Commit 8f21f9c

Browse files
committed
Revert "Reimplement (part of) the or -> add optimization. Matching 'or' into
'add'", which seems to have broken just about everything. llvm-svn: 116033
1 parent 5b2a411 commit 8f21f9c

File tree

4 files changed

+65
-126
lines changed

4 files changed

+65
-126
lines changed

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 21 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -997,63 +997,6 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
997997
def : Pat<(i64 (zext def32:$src)),
998998
(SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
999999

1000-
//===----------------------------------------------------------------------===//
1001-
// Pattern match OR as ADD
1002-
//===----------------------------------------------------------------------===//
1003-
1004-
// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
1005-
// 3-addressified into an LEA instruction to avoid copies. However, we also
1006-
// want to finally emit these instructions as an or at the end of the code
1007-
// generator to make the generated code easier to read. To do this, we select
1008-
// into "disjoint bits" pseudo ops.
1009-
1010-
// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
1011-
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
1012-
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
1013-
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
1014-
1015-
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
1016-
APInt Mask = APInt::getAllOnesValue(BitWidth);
1017-
APInt KnownZero0, KnownOne0;
1018-
CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
1019-
APInt KnownZero1, KnownOne1;
1020-
CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
1021-
return (~KnownZero0 & ~KnownZero1) == 0;
1022-
}]>;
1023-
1024-
1025-
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
1026-
let AddedComplexity = 5 in { // Try this before the selecting to OR
1027-
1028-
let isCommutable = 1, isConvertibleToThreeAddress = 1,
1029-
Constraints = "$src1 = $dst" in {
1030-
def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
1031-
"", // orw/addw REG, REG
1032-
[(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
1033-
def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
1034-
"", // orl/addl REG, REG
1035-
[(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
1036-
def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
1037-
"", // orq/addq REG, REG
1038-
[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
1039-
}
1040-
1041-
def : Pat<(or_is_add GR16:$src1, imm:$src2),
1042-
(ADD16ri GR16:$src1, imm:$src2)>;
1043-
def : Pat<(or_is_add GR32:$src1, imm:$src2),
1044-
(ADD32ri GR32:$src1, imm:$src2)>;
1045-
def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
1046-
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
1047-
1048-
def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
1049-
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
1050-
def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
1051-
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
1052-
def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
1053-
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
1054-
} // AddedComplexity
1055-
1056-
10571000
//===----------------------------------------------------------------------===//
10581001
// Some peepholes
10591002
//===----------------------------------------------------------------------===//
@@ -1366,8 +1309,27 @@ def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
13661309
def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
13671310
(SETB_C32r)>;
13681311

1369-
1370-
1312+
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
1313+
let AddedComplexity = 5 in { // Try this before the selecting to OR
1314+
def : Pat<(or_is_add GR16:$src1, imm:$src2),
1315+
(ADD16ri GR16:$src1, imm:$src2)>;
1316+
def : Pat<(or_is_add GR32:$src1, imm:$src2),
1317+
(ADD32ri GR32:$src1, imm:$src2)>;
1318+
def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
1319+
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
1320+
def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
1321+
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
1322+
def : Pat<(or_is_add GR16:$src1, GR16:$src2),
1323+
(ADD16rr GR16:$src1, GR16:$src2)>;
1324+
def : Pat<(or_is_add GR32:$src1, GR32:$src2),
1325+
(ADD32rr GR32:$src1, GR32:$src2)>;
1326+
def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
1327+
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
1328+
def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
1329+
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
1330+
def : Pat<(or_is_add GR64:$src1, GR64:$src2),
1331+
(ADD64rr GR64:$src1, GR64:$src2)>;
1332+
} // AddedComplexity
13711333

13721334
//===----------------------------------------------------------------------===//
13731335
// EFLAGS-defining Patterns

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 30 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,6 @@ ReMatPICStubLoad("remat-pic-stub-load",
5454
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
5555
: TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
5656
TM(tm), RI(tm, *this) {
57-
enum {
58-
TB_NOT_REVERSABLE = 1U << 31,
59-
TB_FLAGS = TB_NOT_REVERSABLE
60-
};
61-
6257
static const unsigned OpTbl2Addr[][2] = {
6358
{ X86::ADC32ri, X86::ADC32mi },
6459
{ X86::ADC32ri8, X86::ADC32mi8 },
@@ -69,15 +64,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
6964
{ X86::ADD16ri, X86::ADD16mi },
7065
{ X86::ADD16ri8, X86::ADD16mi8 },
7166
{ X86::ADD16rr, X86::ADD16mr },
72-
{ X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
7367
{ X86::ADD32ri, X86::ADD32mi },
7468
{ X86::ADD32ri8, X86::ADD32mi8 },
7569
{ X86::ADD32rr, X86::ADD32mr },
76-
{ X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
7770
{ X86::ADD64ri32, X86::ADD64mi32 },
7871
{ X86::ADD64ri8, X86::ADD64mi8 },
7972
{ X86::ADD64rr, X86::ADD64mr },
80-
{ X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
8173
{ X86::ADD8ri, X86::ADD8mi },
8274
{ X86::ADD8rr, X86::ADD8mr },
8375
{ X86::AND16ri, X86::AND16mi },
@@ -222,21 +214,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
222214

223215
for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
224216
unsigned RegOp = OpTbl2Addr[i][0];
225-
unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
226-
assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
227-
RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
228-
229-
// If this is not a reversable operation (because there is a many->one)
230-
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
231-
if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
232-
continue;
233-
217+
unsigned MemOp = OpTbl2Addr[i][1];
218+
if (!RegOp2MemOpTable2Addr.insert(std::make_pair(RegOp,
219+
std::make_pair(MemOp,0))).second)
220+
assert(false && "Duplicated entries?");
234221
// Index 0, folded load and store, no alignment requirement.
235222
unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
236-
237-
assert(!MemOp2RegOpTable.count(MemOp) &&
238-
"Duplicated entries in unfolding maps?");
239-
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
223+
if (!MemOp2RegOpTable.insert(std::make_pair(MemOp,
224+
std::make_pair(RegOp,
225+
AuxInfo))).second)
226+
assert(false && "Duplicated entries in unfolding maps?");
240227
}
241228

242229
// If the third value is 1, then it's folding either a load or a store.
@@ -466,11 +453,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
466453
{ X86::ADC32rr, X86::ADC32rm, 0 },
467454
{ X86::ADC64rr, X86::ADC64rm, 0 },
468455
{ X86::ADD16rr, X86::ADD16rm, 0 },
469-
{ X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
470456
{ X86::ADD32rr, X86::ADD32rm, 0 },
471-
{ X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
472457
{ X86::ADD64rr, X86::ADD64rm, 0 },
473-
{ X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
474458
{ X86::ADD8rr, X86::ADD8rm, 0 },
475459
{ X86::ADDPDrr, X86::ADDPDrm, 16 },
476460
{ X86::ADDPSrr, X86::ADDPSrm, 16 },
@@ -665,23 +649,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
665649

666650
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
667651
unsigned RegOp = OpTbl2[i][0];
668-
unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
652+
unsigned MemOp = OpTbl2[i][1];
669653
unsigned Align = OpTbl2[i][2];
670-
671-
assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
672-
RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
673-
674-
675-
// If this is not a reversable operation (because there is a many->one)
676-
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
677-
if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
678-
continue;
679-
654+
if (!RegOp2MemOpTable2.insert(std::make_pair(RegOp,
655+
std::make_pair(MemOp,Align))).second)
656+
assert(false && "Duplicated entries?");
680657
// Index 2, folded load
681658
unsigned AuxInfo = 2 | (1 << 4);
682-
assert(!MemOp2RegOpTable.count(MemOp) &&
683-
"Duplicated entries in unfolding maps?");
684-
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
659+
if (!MemOp2RegOpTable.insert(std::make_pair(MemOp,
660+
std::make_pair(RegOp, AuxInfo))).second)
661+
assert(false && "Duplicated entries in unfolding maps?");
685662
}
686663
}
687664

@@ -1156,8 +1133,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
11561133
case X86::ADD16ri8:
11571134
addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
11581135
break;
1159-
case X86::ADD16rr:
1160-
case X86::ADD16rr_DB: {
1136+
case X86::ADD16rr: {
11611137
unsigned Src2 = MI->getOperand(2).getReg();
11621138
bool isKill2 = MI->getOperand(2).isKill();
11631139
unsigned leaInReg2 = 0;
@@ -1370,27 +1346,18 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
13701346
Src, isKill, -1);
13711347
break;
13721348
case X86::ADD64rr:
1373-
case X86::ADD64rr_DB:
1374-
case X86::ADD32rr:
1375-
case X86::ADD32rr_DB: {
1349+
case X86::ADD32rr: {
13761350
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
1377-
unsigned Opc;
1378-
TargetRegisterClass *RC;
1379-
if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
1380-
Opc = X86::LEA64r;
1381-
RC = X86::GR64_NOSPRegisterClass;
1382-
} else {
1383-
Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
1384-
RC = X86::GR32_NOSPRegisterClass;
1385-
}
1386-
1387-
1351+
unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r
1352+
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
13881353
unsigned Src2 = MI->getOperand(2).getReg();
13891354
bool isKill2 = MI->getOperand(2).isKill();
13901355

13911356
// LEA can't handle RSP.
13921357
if (TargetRegisterInfo::isVirtualRegister(Src2) &&
1393-
!MF.getRegInfo().constrainRegClass(Src2, RC))
1358+
!MF.getRegInfo().constrainRegClass(Src2,
1359+
MIOpc == X86::ADD64rr ? X86::GR64_NOSPRegisterClass :
1360+
X86::GR32_NOSPRegisterClass))
13941361
return 0;
13951362

13961363
NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1401,8 +1368,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
14011368
LV->replaceKillInstruction(Src2, MI, NewMI);
14021369
break;
14031370
}
1404-
case X86::ADD16rr:
1405-
case X86::ADD16rr_DB: {
1371+
case X86::ADD16rr: {
14061372
if (DisableLEA16)
14071373
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
14081374
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
@@ -2630,8 +2596,13 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
26302596
OpcodeTablePtr = &RegOp2MemOpTable2;
26312597
}
26322598

2633-
if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
2634-
return true;
2599+
if (OpcodeTablePtr) {
2600+
// Find the Opcode to fuse
2601+
DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
2602+
OpcodeTablePtr->find(Opc);
2603+
if (I != OpcodeTablePtr->end())
2604+
return true;
2605+
}
26352606
return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
26362607
}
26372608

llvm/lib/Target/X86/X86InstrInfo.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,20 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
544544
return N->hasOneUse();
545545
}]>;
546546

547+
// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
548+
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
549+
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
550+
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
551+
552+
unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
553+
APInt Mask = APInt::getAllOnesValue(BitWidth);
554+
APInt KnownZero0, KnownOne0;
555+
CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
556+
APInt KnownZero1, KnownOne1;
557+
CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
558+
return (~KnownZero0 & ~KnownZero1) == 0;
559+
}]>;
560+
547561
//===----------------------------------------------------------------------===//
548562
// Instruction list.
549563
//

llvm/lib/Target/X86/X86MCInstLower.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
347347
}
348348

349349
// Handle a few special cases to eliminate operand modifiers.
350-
ReSimplify:
351350
switch (OutMI.getOpcode()) {
352351
case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
353352
lower_lea64_32mem(&OutMI, 1);
@@ -434,13 +433,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
434433
break;
435434
}
436435

437-
// These are pseudo-ops for OR to help with the OR->ADD transformation. We do
438-
// this with an ugly goto in case the resultant OR uses EAX and needs the
439-
// short form.
440-
case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
441-
case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
442-
case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
443-
444436
// The assembler backend wants to see branches in their small form and relax
445437
// them to their large form. The JIT can only handle the large form because
446438
// it does not do relaxation. For now, translate the large form to the

0 commit comments

Comments
 (0)