Skip to content

Commit 1af3d37

Browse files
committed
[MC][ARM] Fix crash when assembling Thumb 'movs r0,#foo'.
If the assembler sees this instruction, understanding 'foo' to be an external symbol, there's no relocation it can write that will put the whole value of 'foo' into the 8-bit immediate field of the 16-bit Thumb add instruction. So it should report an error message pointing at the source line, and in LLVM 18, it did exactly that. But now the error is not reported, due to an indexing error in the operand list in `validateInstruction`, and instead the code continues to attempt assembly, ending up aborting at the `llvm_unreachable` at the end of `getHiLoImmOpValue`. In this commit I've fixed the index in the `ARM::tMOVi8` case of `validateInstruction`, and also the one for `tADDi8` which must cope with either the 2- or 3-operand form in the input assembly source. But also, while writing the test, I found that if you assemble for Armv7-M instead of Armv6-M, the instruction has opcode `t2ADDri` when it goes through `validateInstruction`, and only turns into `tMOVi8` later in `processInstruction`. Then it's too late for `validateInstruction` to report that error. So I've introduced a new `revalidateInstruction` which is run _after_ modifying the instruction, and repeated the same check there.
1 parent a256e89 commit 1af3d37

File tree

2 files changed

+85
-9
lines changed

2 files changed

+85
-9
lines changed

llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,8 @@ class ARMAsmParser : public MCTargetAsmParser {
659659

660660
bool validateInstruction(MCInst &Inst, const OperandVector &Ops,
661661
unsigned MnemonicOpsEndInd);
662+
bool revalidateInstruction(MCInst &Inst, const OperandVector &Ops,
663+
unsigned MnemonicOpsEndInd, unsigned OrigOpcode);
662664
bool processInstruction(MCInst &Inst, const OperandVector &Ops,
663665
unsigned MnemonicOpsEndInd, MCStreamer &Out);
664666
bool shouldOmitVectorPredicateOperand(StringRef Mnemonic,
@@ -8294,15 +8296,17 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
82948296
break;
82958297
}
82968298
case ARM::tADDi8: {
8297-
MCParsedAsmOperand &Op = *Operands[MnemonicOpsEndInd + 1];
8299+
int i = (Operands[MnemonicOpsEndInd + 1]->isImm()) ? MnemonicOpsEndInd + 1
8300+
: MnemonicOpsEndInd + 2;
8301+
MCParsedAsmOperand &Op = *Operands[i];
82988302
if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
82998303
return Error(Op.getStartLoc(),
83008304
"Immediate expression for Thumb adds requires :lower0_7:,"
83018305
" :lower8_15:, :upper0_7: or :upper8_15:");
83028306
break;
83038307
}
83048308
case ARM::tMOVi8: {
8305-
MCParsedAsmOperand &Op = *Operands[MnemonicOpsEndInd];
8309+
MCParsedAsmOperand &Op = *Operands[MnemonicOpsEndInd + 1];
83068310
if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
83078311
return Error(Op.getStartLoc(),
83088312
"Immediate expression for Thumb movs requires :lower0_7:,"
@@ -8653,6 +8657,41 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
86538657
return false;
86548658
}
86558659

8660+
// After processInstruction has transformed an instruction being assembled into
8661+
// a different opcode, do any further validity checks that the new opcode
8662+
// depends on.
8663+
//
8664+
// `Inst` contains the final modified form of the instruction, but `Operands`
8665+
// contains the parsed operands from the _original_ instruction, because
8666+
// nothing has updated them (`processInstruction` received them as const).
8667+
// `OrigOpcode` contains the original value of `Inst.getOpcode()`, which should
8668+
// give enough context to know how to understand the original operands list.
8669+
bool ARMAsmParser::revalidateInstruction(MCInst &Inst,
8670+
const OperandVector &Operands,
8671+
unsigned MnemonicOpsEndInd,
8672+
unsigned OrigOpcode) {
8673+
const unsigned NewOpcode = Inst.getOpcode();
8674+
8675+
if (OrigOpcode == ARM::t2ADDri && NewOpcode == ARM::tADDi8) {
8676+
// t2ADDri is the Thumb 32-bit immediate add instruction, for example
8677+
// 'add[s] r0,r1,#0xff00'. If its immediate argument isn't a constant
8678+
// requiring shifting, then processInstruction can turn it into tADDi8, the
8679+
// simpler 16-bit Thumb immediate add (provided all the other conditions
8680+
// for that transformation are met). That makes it too late for
8681+
// validateInstruction to do this check, which it would have done if it had
8682+
// known from the start that the instruction was tADDi8.
8683+
int i = (Operands[MnemonicOpsEndInd + 1]->isImm()) ? MnemonicOpsEndInd + 1
8684+
: MnemonicOpsEndInd + 2;
8685+
MCParsedAsmOperand &Op = *Operands[i];
8686+
if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
8687+
return Error(Op.getStartLoc(),
8688+
"Immediate expression for Thumb adds requires :lower0_7:,"
8689+
" :lower8_15:, :upper0_7: or :upper8_15:");
8690+
}
8691+
8692+
return false;
8693+
}
8694+
86568695
static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) {
86578696
switch(Opc) {
86588697
default: llvm_unreachable("unexpected opcode!");
@@ -11399,7 +11438,7 @@ bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1139911438
unsigned MnemonicOpsEndInd = getMnemonicOpsEndInd(Operands);
1140011439

1140111440
switch (MatchResult) {
11402-
case Match_Success:
11441+
case Match_Success: {
1140311442
LLVM_DEBUG(dbgs() << "Parsed as: ";
1140411443
Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
1140511444
dbgs() << "\n");
@@ -11414,15 +11453,31 @@ bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1141411453
return true;
1141511454
}
1141611455

11417-
{
11418-
// Some instructions need post-processing to, for example, tweak which
11419-
// encoding is selected. Loop on it while changes happen so the
11420-
// individual transformations can chain off each other. E.g.,
11421-
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
11422-
while (processInstruction(Inst, Operands, MnemonicOpsEndInd, Out))
11456+
// Some instructions need post-processing to, for example, tweak which
11457+
// encoding is selected. Loop on it while changes happen so the individual
11458+
// transformations can chain off each other. E.g.,
11459+
// tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
11460+
//
11461+
// This is written as a do-while inside an if, instead of the more obvious
11462+
// while loop, so that after postprocessing has completed the revised
11463+
// instruction can be revalidated, but (to save time) only if any changes
11464+
// had to be made at all.
11465+
unsigned OrigOpcode = Inst.getOpcode();
11466+
if (processInstruction(Inst, Operands, MnemonicOpsEndInd, Out)) {
11467+
do {
1142311468
LLVM_DEBUG(dbgs() << "Changed to: ";
1142411469
Inst.dump_pretty(dbgs(), MII.getName(Inst.getOpcode()));
1142511470
dbgs() << "\n");
11471+
} while (processInstruction(Inst, Operands, MnemonicOpsEndInd, Out));
11472+
11473+
MnemonicOpsEndInd = getMnemonicOpsEndInd(Operands);
11474+
if (revalidateInstruction(Inst, Operands, MnemonicOpsEndInd,
11475+
OrigOpcode)) {
11476+
// As above, advance IT/VPT positions if we're exiting early.
11477+
forwardITPosition();
11478+
forwardVPTPosition();
11479+
return true;
11480+
}
1142611481
}
1142711482

1142811483
// Only move forward at the very end so that everything in validate
@@ -11445,6 +11500,7 @@ bool ARMAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1144511500
Out.emitInstruction(Inst, getSTI());
1144611501
}
1144711502
return false;
11503+
}
1144811504
case Match_NearMisses:
1144911505
ReportNearMisses(NearMisses, IDLoc, Operands);
1145011506
return true;
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: not llvm-mc --triple thumbv6m -show-encoding %s 2>&1 | FileCheck %s
2+
// RUN: not llvm-mc --triple thumbv7m -show-encoding %s 2>&1 | FileCheck %s --check-prefixes=CHECK,THUMB2
3+
4+
// Check reporting of errors of the form "you should have used
5+
// :lower16: in this immediate field".
6+
7+
// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb movs requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
8+
movs r0, #foo
9+
10+
// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb adds requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
11+
adds r0, #foo
12+
13+
// CHECK: :[[@LINE+1]]:14: error: Immediate expression for Thumb adds requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
14+
adds r0, r0, #foo
15+
16+
// THUMB2: :[[@LINE+1]]:10: error: immediate expression for mov requires :lower16: or :upper16
17+
movw r0, #foo
18+
19+
// THUMB2: :[[@LINE+1]]:10: error: immediate expression for mov requires :lower16: or :upper16
20+
movt r0, #foo

0 commit comments

Comments
 (0)