Skip to content

Commit dd77477

Browse files
committed
reapply: Use the new TB_NOT_REVERSABLE flag instead of special
reapply: reimplement the second half of the or/add optimization. We should now with no changes. Turns out that one missing "Defs = [EFLAGS]" can upset things a bit. llvm-svn: 116040
1 parent 626656a commit dd77477

File tree

4 files changed

+113
-40
lines changed

4 files changed

+113
-40
lines changed

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,21 +1036,34 @@ def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
10361036
def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
10371037
"", // orq/addq REG, REG
10381038
[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
1039+
1040+
1041+
def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
1042+
"", // orw/addw REG, imm
1043+
[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
1044+
def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
1045+
"", // orl/addl REG, imm
1046+
[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
1047+
def ADD64ri32_DB : I<0, Pseudo,
1048+
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
1049+
"", // orq/addq REG, imm
1050+
[(set GR64:$dst, (or_is_add GR64:$src1,
1051+
i64immSExt32:$src2))]>;
1052+
1053+
def ADD16ri8_DB : I<0, Pseudo,
1054+
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
1055+
"", // orw/addw REG, imm8
1056+
[(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
1057+
def ADD32ri8_DB : I<0, Pseudo,
1058+
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
1059+
"", // orl/addl REG, imm8
1060+
[(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
1061+
def ADD64ri8_DB : I<0, Pseudo,
1062+
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
1063+
"", // orq/addq REG, imm8
1064+
[(set GR64:$dst, (or_is_add GR64:$src1,
1065+
i64immSExt8:$src2))]>;
10391066
}
1040-
1041-
def : Pat<(or_is_add GR16:$src1, imm:$src2),
1042-
(ADD16ri GR16:$src1, imm:$src2)>;
1043-
def : Pat<(or_is_add GR32:$src1, imm:$src2),
1044-
(ADD32ri GR32:$src1, imm:$src2)>;
1045-
def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
1046-
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
1047-
1048-
def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
1049-
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
1050-
def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
1051-
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
1052-
def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
1053-
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
10541067
} // AddedComplexity
10551068

10561069

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
6868
{ X86::ADC64rr, X86::ADC64mr },
6969
{ X86::ADD16ri, X86::ADD16mi },
7070
{ X86::ADD16ri8, X86::ADD16mi8 },
71+
{ X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE },
72+
{ X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
7173
{ X86::ADD16rr, X86::ADD16mr },
7274
{ X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE },
7375
{ X86::ADD32ri, X86::ADD32mi },
7476
{ X86::ADD32ri8, X86::ADD32mi8 },
77+
{ X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE },
78+
{ X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
7579
{ X86::ADD32rr, X86::ADD32mr },
7680
{ X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE },
7781
{ X86::ADD64ri32, X86::ADD64mi32 },
7882
{ X86::ADD64ri8, X86::ADD64mi8 },
83+
{ X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
84+
{ X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
7985
{ X86::ADD64rr, X86::ADD64mr },
8086
{ X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE },
8187
{ X86::ADD8ri, X86::ADD8mi },
@@ -263,8 +269,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
263269
{ X86::DIV64r, X86::DIV64m, 1, 0 },
264270
{ X86::DIV8r, X86::DIV8m, 1, 0 },
265271
{ X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
266-
{ X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 },
267-
{ X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 },
272+
{ X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
273+
{ X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
268274
{ X86::IDIV16r, X86::IDIV16m, 1, 0 },
269275
{ X86::IDIV32r, X86::IDIV32m, 1, 0 },
270276
{ X86::IDIV64r, X86::IDIV64m, 1, 0 },
@@ -323,18 +329,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
323329
};
324330

325331
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
326-
unsigned RegOp = OpTbl0[i][0];
327-
unsigned MemOp = OpTbl0[i][1];
328-
unsigned Align = OpTbl0[i][3];
329-
assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
330-
RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp,Align);
332+
unsigned RegOp = OpTbl0[i][0];
333+
unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS;
331334
unsigned FoldedLoad = OpTbl0[i][2];
335+
unsigned Align = OpTbl0[i][3];
336+
assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
337+
RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
338+
339+
// If this is not a reversable operation (because there is a many->one)
340+
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
341+
if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
342+
continue;
343+
332344
// Index 0, folded load or store.
333345
unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
334-
if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) {
335-
assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
336-
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
337-
}
346+
assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
347+
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
338348
}
339349

340350
static const unsigned OpTbl1[][3] = {
@@ -352,8 +362,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
352362
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
353363
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
354364
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
355-
{ X86::FsMOVAPDrr, X86::MOVSDrm, 0 },
356-
{ X86::FsMOVAPSrr, X86::MOVSSrm, 0 },
365+
{ X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
366+
{ X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
357367
{ X86::IMUL16rri, X86::IMUL16rmi, 0 },
358368
{ X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
359369
{ X86::IMUL32rri, X86::IMUL32rmi, 0 },
@@ -449,17 +459,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
449459

450460
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
451461
unsigned RegOp = OpTbl1[i][0];
452-
unsigned MemOp = OpTbl1[i][1];
462+
unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
453463
unsigned Align = OpTbl1[i][2];
454464
assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
455-
RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp,Align);
465+
RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
466+
467+
// If this is not a reversable operation (because there is a many->one)
468+
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
469+
if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
470+
continue;
456471

457472
// Index 1, folded load
458473
unsigned AuxInfo = 1 | (1 << 4);
459-
if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) {
460-
assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
461-
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
462-
}
474+
assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
475+
MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
463476
}
464477

465478
static const unsigned OpTbl2[][3] = {
@@ -671,7 +684,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
671684
assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
672685
RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
673686

674-
675687
// If this is not a reversable operation (because there is a many->one)
676688
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
677689
if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
@@ -1154,6 +1166,8 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
11541166
break;
11551167
case X86::ADD16ri:
11561168
case X86::ADD16ri8:
1169+
case X86::ADD16ri_DB:
1170+
case X86::ADD16ri8_DB:
11571171
addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
11581172
break;
11591173
case X86::ADD16rr:
@@ -1418,14 +1432,18 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
14181432
}
14191433
case X86::ADD64ri32:
14201434
case X86::ADD64ri8:
1435+
case X86::ADD64ri32_DB:
1436+
case X86::ADD64ri8_DB:
14211437
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
14221438
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
14231439
.addReg(Dest, RegState::Define |
14241440
getDeadRegState(isDead)),
14251441
Src, isKill, MI->getOperand(2).getImm());
14261442
break;
14271443
case X86::ADD32ri:
1428-
case X86::ADD32ri8: {
1444+
case X86::ADD32ri8:
1445+
case X86::ADD32ri_DB:
1446+
case X86::ADD32ri8_DB: {
14291447
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
14301448
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
14311449
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
@@ -1436,6 +1454,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
14361454
}
14371455
case X86::ADD16ri:
14381456
case X86::ADD16ri8:
1457+
case X86::ADD16ri_DB:
1458+
case X86::ADD16ri8_DB:
14391459
if (DisableLEA16)
14401460
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
14411461
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");

llvm/lib/Target/X86/X86MCInstLower.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -437,9 +437,15 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
437437
// These are pseudo-ops for OR to help with the OR->ADD transformation. We do
438438
// this with an ugly goto in case the resultant OR uses EAX and needs the
439439
// short form.
440-
case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
441-
case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
442-
case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
440+
case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
441+
case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
442+
case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
443+
case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
444+
case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
445+
case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
446+
case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
447+
case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
448+
case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
443449

444450
// The assembler backend wants to see branches in their small form and relax
445451
// them to their large form. The JIT can only handle the large form because

llvm/test/CodeGen/X86/3addr-or.ll

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
22
; rdar://7527734
33

4-
define i32 @test(i32 %x) nounwind readnone ssp {
4+
define i32 @test1(i32 %x) nounwind readnone ssp {
55
entry:
6-
; CHECK: test:
6+
; CHECK: test1:
77
; CHECK: leal 3(%rdi), %eax
88
%0 = shl i32 %x, 5 ; <i32> [#uses=1]
99
%1 = or i32 %0, 3 ; <i32> [#uses=1]
@@ -25,3 +25,37 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
2525
%H = or i64 %G, %E ; <i64> [#uses=1]
2626
ret i64 %H
2727
}
28+
29+
;; Test that OR is only emitted as LEA, not as ADD.
30+
31+
define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
32+
entry:
33+
; No reason to emit an add here, should be an or.
34+
; CHECK: test3:
35+
; CHECK: orl $3, %edi
36+
%0 = shl i32 %x, 5
37+
%1 = or i32 %0, 3
38+
store i32 %1, i32* %P
39+
ret void
40+
}
41+
42+
define i32 @test4(i32 %a, i32 %b) nounwind readnone ssp {
43+
entry:
44+
%and = and i32 %a, 6
45+
%and2 = and i32 %b, 16
46+
%or = or i32 %and2, %and
47+
ret i32 %or
48+
; CHECK: test4:
49+
; CHECK: leal (%rsi,%rdi), %eax
50+
}
51+
52+
define void @test5(i32 %a, i32 %b, i32* nocapture %P) nounwind ssp {
53+
entry:
54+
%and = and i32 %a, 6
55+
%and2 = and i32 %b, 16
56+
%or = or i32 %and2, %and
57+
store i32 %or, i32* %P, align 4
58+
ret void
59+
; CHECK: test5:
60+
; CHECK: orl
61+
}

0 commit comments

Comments
 (0)