Skip to content

Commit d93f850

Browse files
authored
[TableGen] Extend OPC_ExtractField/OPC_CheckField start value widths. (llvm#79723)
Both OPC_ExtractField and OPC_CheckField are currently defined to take an unsigned 8-bit start value. On some architectures with long instruction words, this value can silently overflow, resulting in a bad decoder table. This patch changes each to take a ULE128B-encoded start value instead. Additionally, a range assertion is added for the 8-bit length to prominently notify a user in case that field ever overflows. This problem isn't currently exposed upstream since all in-tree targets use small instruction words (i.e., bitwidth <= 64 bits). It does show up in at least one downstream target with instructions > 64 bits long. Co-authored-by: Jason Eckhardt <[email protected]>
1 parent a372460 commit d93f850

File tree

3 files changed

+93
-21
lines changed

3 files changed

+93
-21
lines changed

llvm/include/llvm/MC/MCDecoderOps.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ namespace llvm {
1515
namespace MCD {
1616
// Disassembler state machine opcodes.
1717
enum DecoderOps {
18-
OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len)
18+
OPC_ExtractField = 1, // OPC_ExtractField(uleb128 Start, uint8_t Len)
1919
OPC_FilterValue, // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip)
20-
OPC_CheckField, // OPC_CheckField(uint8_t Start, uint8_t Len,
20+
OPC_CheckField, // OPC_CheckField(uleb128 Start, uint8_t Len,
2121
// uleb128 Val, uint16_t NumToSkip)
2222
OPC_CheckPredicate, // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
2323
OPC_Decode, // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
2+
3+
// Test for OPC_ExtractField/OPC_CheckField with start bit > 255.
4+
// These large start values may arise for architectures with long instruction
5+
// words.
6+
7+
include "llvm/Target/Target.td"
8+
9+
def archInstrInfo : InstrInfo { }
10+
11+
def arch : Target {
12+
let InstructionSet = archInstrInfo;
13+
}
14+
15+
class TestInstruction : Instruction {
16+
let Size = 64;
17+
let OutOperandList = (outs);
18+
let InOperandList = (ins);
19+
field bits<512> Inst;
20+
field bits<512> SoftFail = 0;
21+
}
22+
23+
def InstA : TestInstruction {
24+
let Inst{509-502} = {0,0,0,0,?,?,?,?};
25+
let AsmString = "InstA";
26+
}
27+
28+
def InstB : TestInstruction {
29+
let Inst{509-502} = {0,0,0,0,0,0,?,?};
30+
let AsmString = "InstB";
31+
let DecoderMethod = "DecodeInstB";
32+
let hasCompleteDecoder = 0;
33+
}
34+
35+
36+
// CHECK: /* 0 */ MCD::OPC_ExtractField, 250, 3, 4, // Inst{509-506} ...
37+
// CHECK-NEXT: /* 4 */ MCD::OPC_FilterValue, 0, 19, 0, 0, // Skip to: 28
38+
// CHECK-NEXT: /* 9 */ MCD::OPC_CheckField, 248, 3, 2, 0, 7, 0, 0, // Skip to: 24
39+
// CHECK-NEXT: /* 17 */ MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 24
40+
// CHECK-NEXT: /* 24 */ MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
41+
// CHECK-NEXT: /* 28 */ MCD::OPC_Fail,
42+
43+
// CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
44+

llvm/utils/TableGen/DecoderEmitter.cpp

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -676,8 +676,13 @@ static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
676676
// Emit table entries to decode instructions given a segment or segments
677677
// of bits.
678678
void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
679+
assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
679680
TableInfo.Table.push_back(MCD::OPC_ExtractField);
680-
TableInfo.Table.push_back(StartBit);
681+
682+
SmallString<16> SBytes;
683+
raw_svector_ostream S(SBytes);
684+
encodeULEB128(StartBit, S);
685+
TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end());
681686
TableInfo.Table.push_back(NumBits);
682687

683688
// A new filter entry begins a new scope for fixup resolution.
@@ -786,10 +791,20 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
786791
PrintFatalError("invalid decode table opcode");
787792
case MCD::OPC_ExtractField: {
788793
++I;
789-
unsigned Start = *I++;
794+
OS.indent(Indentation) << "MCD::OPC_ExtractField, ";
795+
796+
// ULEB128 encoded start value.
797+
uint8_t Buffer[16], *P = Buffer;
798+
while ((*P++ = *I++) >= 128)
799+
assert((P - Buffer) <= (ptrdiff_t)sizeof(Buffer) &&
800+
"ULEB128 value too large!");
801+
unsigned Start = decodeULEB128(Buffer);
802+
for (P = Buffer; *P >= 128; ++P)
803+
OS << (unsigned)*P << ", ";
804+
OS << (unsigned)*P << ", ";
805+
790806
unsigned Len = *I++;
791-
OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", "
792-
<< Len << ", // Inst{";
807+
OS << Len << ", // Inst{";
793808
if (Len > 1)
794809
OS << (Start + Len - 1) << "-";
795810
OS << Start << "} ...\n";
@@ -818,10 +833,14 @@ void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
818833
}
819834
case MCD::OPC_CheckField: {
820835
++I;
821-
unsigned Start = *I++;
836+
OS.indent(Indentation) << "MCD::OPC_CheckField, ";
837+
// ULEB128 encoded start value.
838+
for (; *I >= 128; ++I)
839+
OS << (unsigned)*I << ", ";
840+
OS << (unsigned)*I++ << ", ";
841+
// 8-bit length.
822842
unsigned Len = *I++;
823-
OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", "
824-
<< Len << ", ";// << Val << ", " << NumToSkip << ",\n";
843+
OS << Len << ", ";
825844
// ULEB128 encoded field value.
826845
for (; *I >= 128; ++I)
827846
OS << (unsigned)*I << ", ";
@@ -1416,15 +1435,19 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
14161435

14171436
// Check any additional encoding fields needed.
14181437
for (unsigned I = Size; I != 0; --I) {
1419-
unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;
1438+
unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1;
1439+
assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
14201440
TableInfo.Table.push_back(MCD::OPC_CheckField);
1421-
TableInfo.Table.push_back(StartBits[I-1]);
1441+
uint8_t Buffer[16], *P;
1442+
encodeULEB128(StartBits[I - 1], Buffer);
1443+
for (P = Buffer; *P >= 128; ++P)
1444+
TableInfo.Table.push_back(*P);
1445+
TableInfo.Table.push_back(*P);
14221446
TableInfo.Table.push_back(NumBits);
1423-
uint8_t Buffer[16], *p;
1424-
encodeULEB128(FieldVals[I-1], Buffer);
1425-
for (p = Buffer; *p >= 128 ; ++p)
1426-
TableInfo.Table.push_back(*p);
1427-
TableInfo.Table.push_back(*p);
1447+
encodeULEB128(FieldVals[I - 1], Buffer);
1448+
for (P = Buffer; *P >= 128; ++P)
1449+
TableInfo.Table.push_back(*P);
1450+
TableInfo.Table.push_back(*P);
14281451
// Push location for NumToSkip backpatching.
14291452
TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
14301453
// The fixup is always 24-bits, so go ahead and allocate the space
@@ -2227,9 +2250,11 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS,
22272250
<< " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n"
22282251
<< " return MCDisassembler::Fail;\n"
22292252
<< " case MCD::OPC_ExtractField: {\n"
2230-
<< " unsigned Start = *++Ptr;\n"
2231-
<< " unsigned Len = *++Ptr;\n"
2232-
<< " ++Ptr;\n";
2253+
<< " // Decode the start value.\n"
2254+
<< " unsigned DecodedLen;\n"
2255+
<< " unsigned Start = decodeULEB128(++Ptr, &DecodedLen);\n"
2256+
<< " Ptr += DecodedLen;\n"
2257+
<< " unsigned Len = *Ptr++;\n";
22332258
if (IsVarLenInst)
22342259
OS << " makeUp(insn, Start + Len);\n";
22352260
OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
@@ -2261,8 +2286,11 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS,
22612286
<< " break;\n"
22622287
<< " }\n"
22632288
<< " case MCD::OPC_CheckField: {\n"
2264-
<< " unsigned Start = *++Ptr;\n"
2265-
<< " unsigned Len = *++Ptr;\n";
2289+
<< " // Decode the start value.\n"
2290+
<< " unsigned Len;\n"
2291+
<< " unsigned Start = decodeULEB128(++Ptr, &Len);\n"
2292+
<< " Ptr += Len;\n"
2293+
<< " Len = *Ptr;\n";
22662294
if (IsVarLenInst)
22672295
OS << " makeUp(insn, Start + Len);\n";
22682296
OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n"

0 commit comments

Comments
 (0)