Skip to content

Commit bd0cf26

Browse files
committed
Change strategy to avoid converting to narrow add
I've reverted the part of the previous commit that introduced and used `revalidateInstruction`, replacing it with a more careful check in `processInstruction` of whether to transform `t2ADDri` → `tADDi8` in the first place. I've kept the index fixes in the existing `validateInstruction`. Also expanded the testing considerably, including making two test files to deal with the fact that some of these unemittable instructions aren't detected until a later phase of assembly, which won't be reached if there are any errors in the first phase.
1 parent 1af3d37 commit bd0cf26

File tree

3 files changed

+82
-76
lines changed

3 files changed

+82
-76
lines changed

llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

Lines changed: 26 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -659,8 +659,6 @@ class ARMAsmParser : public MCTargetAsmParser {
659659

660660
bool validateInstruction(MCInst &Inst, const OperandVector &Ops,
661661
unsigned MnemonicOpsEndInd);
662-
bool revalidateInstruction(MCInst &Inst, const OperandVector &Ops,
663-
unsigned MnemonicOpsEndInd, unsigned OrigOpcode);
664662
bool processInstruction(MCInst &Inst, const OperandVector &Ops,
665663
unsigned MnemonicOpsEndInd, MCStreamer &Out);
666664
bool shouldOmitVectorPredicateOperand(StringRef Mnemonic,
@@ -8657,41 +8655,6 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
86578655
return false;
86588656
}
86598657

8660-
// After processInstruction has transformed an instruction being assembled into
8661-
// a different opcode, do any further validity checks that the new opcode
8662-
// depends on.
8663-
//
8664-
// `Inst` contains the final modified form of the instruction, but `Operands`
8665-
// contains the parsed operands from the _original_ instruction, because
8666-
// nothing has updated them (`processInstruction` received them as const).
8667-
// `OrigOpcode` contains the original value of `Inst.getOpcode()`, which should
8668-
// give enough context to know how to understand the original operands list.
8669-
bool ARMAsmParser::revalidateInstruction(MCInst &Inst,
8670-
const OperandVector &Operands,
8671-
unsigned MnemonicOpsEndInd,
8672-
unsigned OrigOpcode) {
8673-
const unsigned NewOpcode = Inst.getOpcode();
8674-
8675-
if (OrigOpcode == ARM::t2ADDri && NewOpcode == ARM::tADDi8) {
8676-
// t2ADDri is the Thumb 32-bit immediate add instruction, for example
8677-
// 'add[s] r0,r1,#0xff00'. If its immediate argument isn't a constant
8678-
// requiring shifting, then processInstruction can turn it into tADDi8, the
8679-
// simpler 16-bit Thumb immediate add (provided all the other conditions
8680-
// for that transformation are met). That makes it too late for
8681-
// validateInstruction to do this check, which it would have done if it had
8682-
// known from the start that the instruction was tADDi8.
8683-
int i = (Operands[MnemonicOpsEndInd + 1]->isImm()) ? MnemonicOpsEndInd + 1
8684-
: MnemonicOpsEndInd + 2;
8685-
MCParsedAsmOperand &Op = *Operands[i];
8686-
if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
8687-
return Error(Op.getStartLoc(),
8688-
"Immediate expression for Thumb adds requires :lower0_7:,"
8689-
" :lower8_15:, :upper0_7: or :upper8_15:");
8690-
}
8691-
8692-
return false;
8693-
}
8694-
86958658
static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) {
86968659
switch(Opc) {
86978660
default: llvm_unreachable("unexpected opcode!");
@@ -10748,14 +10711,25 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
1074810711
// the flags are compatible with the current IT status, use encoding T2
1074910712
// instead of T3. For compatibility with the system 'as'. Make sure the
1075010713
// wide encoding wasn't explicit.
10751-
if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
10752-
!isARMLowRegister(Inst.getOperand(0).getReg()) ||
10753-
(Inst.getOperand(2).isImm() &&
10754-
(unsigned)Inst.getOperand(2).getImm() > 255) ||
10755-
Inst.getOperand(5).getReg() !=
10756-
(inITBlock() ? ARM::NoRegister : ARM::CPSR) ||
10757-
HasWideQualifier)
10758-
break;
10714+
if (HasWideQualifier)
10715+
break; // source code has asked for the 32-bit instruction
10716+
if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg())
10717+
break; // tADDi8 can't take different input and output registers
10718+
if (!isARMLowRegister(Inst.getOperand(0).getReg()))
10719+
break; // high register that tADDi8 can't access
10720+
if (Inst.getOperand(5).getReg() !=
10721+
(inITBlock() ? ARM::NoRegister : ARM::CPSR))
10722+
break; // flag-modification would require overriding the IT state
10723+
if (Inst.getOperand(2).isImm()) {
10724+
if ((unsigned)Inst.getOperand(2).getImm() > 255)
10725+
break; // large immediate that tADDi8 can't contain
10726+
} else {
10727+
int i = (Operands[MnemonicOpsEndInd + 1]->isImm()) ? MnemonicOpsEndInd + 1
10728+
: MnemonicOpsEndInd + 2;
10729+
MCParsedAsmOperand &Op = *Operands[i];
10730+
if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
10731+
break; // a type of non-immediate that tADDi8 can't represent
10732+
}
1075910733
MCInst TmpInst;
1076010734
TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
1076110735
ARM::tADDi8 : ARM::tSUBi8);
@@ -11438,7 +11412,7 @@ bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1143811412
unsigned MnemonicOpsEndInd = getMnemonicOpsEndInd(Operands);
1143911413

1144011414
switch (MatchResult) {
11441-
case Match_Success: {
11415+
case Match_Success:
1144211416
LLVM_DEBUG(dbgs() << "Parsed as: ";
1144311417
Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
1144411418
dbgs() << "\n");
@@ -11453,31 +11427,15 @@ bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1145311427
return true;
1145411428
}
1145511429

11456-
// Some instructions need post-processing to, for example, tweak which
11457-
// encoding is selected. Loop on it while changes happen so the individual
11458-
// transformations can chain off each other. E.g.,
11459-
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
11460-
//
11461-
// This is written as a do-while inside an if, instead of the more obvious
11462-
// while loop, so that after postprocessing has completed the revised
11463-
// instruction can be revalidated, but (to save time) only if any changes
11464-
// had to be made at all.
11465-
unsigned OrigOpcode = Inst.getOpcode();
11466-
if (processInstruction(Inst, Operands, MnemonicOpsEndInd, Out)) {
11467-
do {
11430+
{
11431+
// Some instructions need post-processing to, for example, tweak which
11432+
// encoding is selected. Loop on it while changes happen so the
11433+
// individual transformations can chain off each other. E.g.,
11434+
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
11435+
while (processInstruction(Inst, Operands, MnemonicOpsEndInd, Out))
1146811436
LLVM_DEBUG(dbgs() << "Changed to: ";
1146911437
Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
1147011438
dbgs() << "\n");
11471-
} while (processInstruction(Inst, Operands, MnemonicOpsEndInd, Out));
11472-
11473-
MnemonicOpsEndInd = getMnemonicOpsEndInd(Operands);
11474-
if (revalidateInstruction(Inst, Operands, MnemonicOpsEndInd,
11475-
OrigOpcode)) {
11476-
// As above, advance IT/VPT positions if we're exiting early.
11477-
forwardITPosition();
11478-
forwardVPTPosition();
11479-
return true;
11480-
}
1148111439
}
1148211440

1148311441
// Only move forward at the very end so that everything in validate
@@ -11500,7 +11458,6 @@ bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1150011458
Out.emitInstruction(Inst, getSTI());
1150111459
}
1150211460
return false;
11503-
}
1150411461
case Match_NearMisses:
1150511462
ReportNearMisses(NearMisses, IDLoc, Operands);
1150611463
return true;
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// RUN: not llvm-mc --triple thumbv7m -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
2+
3+
// This test checks reporting of errors of the form "you should have
4+
// used :lower16: in this immediate field", when the errors are
5+
// discovered at the object-file output stage by checking the set of
6+
// available relocations.
7+
//
8+
// For errors that are reported earlier, when initially reading the
9+
// instructions, see lower-upper-errors.s.
10+
11+
// CHECK: [[@LINE+1]]:1: error: unsupported relocation
12+
adds r0, r0, #foo
13+
14+
// CHECK: [[@LINE+1]]:1: error: unsupported relocation
15+
add r9, r0, #foo
16+
17+
// CHECK: [[@LINE+1]]:1: error: expected relocatable expression
18+
movs r11, :upper8_15:#foo
Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,51 @@
1-
// RUN: not llvm-mc --triple thumbv6m -show-encoding %s 2>&1 | FileCheck %s
2-
// RUN: not llvm-mc --triple thumbv7m -show-encoding %s 2>&1 | FileCheck %s --check-prefixes=CHECK,THUMB2
1+
// RUN: not llvm-mc --triple thumbv6m %s 2>&1 | FileCheck %s --check-prefixes=CHECK,THUMB1
2+
// RUN: not llvm-mc --triple thumbv7m %s 2>&1 | FileCheck %s --check-prefixes=CHECK,THUMB2
33

4-
// Check reporting of errors of the form "you should have used
5-
// :lower16: in this immediate field".
4+
// This test checks reporting of errors of the form "you should have
5+
// used :lower16: in this immediate field", during initial reading of
6+
// the source file.
7+
//
8+
// For errors that are reported at object-file output time, see
9+
// lower-upper-errors-2.s.
610

711
// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb movs requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
812
movs r0, #foo
913

1014
// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb adds requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
1115
adds r0, #foo
1216

13-
// CHECK: :[[@LINE+1]]:14: error: Immediate expression for Thumb adds requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
14-
adds r0, r0, #foo
15-
1617
// THUMB2: :[[@LINE+1]]:10: error: immediate expression for mov requires :lower16: or :upper16
1718
movw r0, #foo
1819

1920
// THUMB2: :[[@LINE+1]]:10: error: immediate expression for mov requires :lower16: or :upper16
2021
movt r0, #foo
22+
23+
// With a Thumb2 wide add instruction available, this case isn't an error
24+
// while reading the source file. It only causes a problem when an object
25+
// file is output, and it turns out there's no suitable relocation to ask
26+
// for the value of an external symbol to be turned into a Thumb shifted
27+
// immediate field. And in this case the other errors in this source file
28+
// cause assembly to terminate before reaching the object-file output stage
29+
// (even if a test run had had -filetype=obj).
30+
31+
// THUMB1: :[[@LINE+2]]:14: error: Immediate expression for Thumb adds requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
32+
// THUMB2-NOT: :[[@LINE+1]]:{{[0-9]+}}: error:
33+
adds r0, r0, #foo
34+
35+
// Similarly for this version, which _must_ be the wide encoding due
36+
// to the use of a high register and the lack of flag-setting.
37+
38+
// THUMB1: :[[@LINE+2]]:1: error: invalid instruction
39+
// THUMB2-NOT: :[[@LINE+1]]:{{[0-9]+}}: error:
40+
add r9, r0, #foo
41+
42+
// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb movs requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
43+
movs r0, :lower16:#foo
44+
45+
// This is invalid in either architecture: in Thumb1 it can't use a
46+
// high register, and in Thumb2 it can't use :upper8_15:. But the
47+
// Thumb2 case won't cause an error until output.
48+
49+
// THUMB1: :[[@LINE+2]]:1: error: invalid instruction
50+
// THUMB2-NOT: :[[@LINE+1]]:{{[0-9]+}}: error:
51+
movs r11, :upper8_15:#foo

0 commit comments

Comments
 (0)