Skip to content

Commit 0d6ca2f

Browse files
authored
[TableGen][DecoderEmitter] Fix decoder reading bytes past instruction (#154916)
See the added test. Before this change the decoder would first read the second byte, despite the fact that there are 1-byte instructions that could match: ``` /* 0 */ MCD::OPC_ExtractField, 8, 8, // Inst{15-8} ... /* 3 */ MCD::OPC_FilterValue, 0, 4, 0, // Skip to: 11 /* 7 */ MCD::OPC_Decode, 186, 2, 0, // Opcode: I16_0, DecodeIdx: 0 /* 11 */ MCD::OPC_FilterValue, 1, 4, 0, // Skip to: 19 /* 15 */ MCD::OPC_Decode, 187, 2, 0, // Opcode: I16_1, DecodeIdx: 0 /* 19 */ MCD::OPC_FilterValue, 2, 4, 0, // Skip to: 27 /* 23 */ MCD::OPC_Decode, 188, 2, 0, // Opcode: I16_2, DecodeIdx: 0 /* 27 */ MCD::OPC_ExtractField, 0, 1, // Inst{0} ... /* 30 */ MCD::OPC_FilterValue, 0, 4, 0, // Skip to: 38 /* 34 */ MCD::OPC_Decode, 189, 2, 1, // Opcode: I8_0, DecodeIdx: 1 /* 38 */ MCD::OPC_FilterValueOrFail, 1, /* 40 */ MCD::OPC_Decode, 190, 2, 1, // Opcode: I8_1, DecodeIdx: 1 /* 44 */ MCD::OPC_Fail, ``` There are no changes in the generated files. The only in-tree target that uses variable length decoder is M68k, which is free of decoding conflicts that could result in the decoder doing OOB access. This also fixes misaligned "Decoding Conflict" dump, prettified example output is shown in the second test.
1 parent 67740d3 commit 0d6ca2f

File tree

3 files changed

+153
-34
lines changed

3 files changed

+153
-34
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: llvm-tblgen -gen-disassembler -I %p/../../../include %s | FileCheck %s
2+
3+
include "llvm/Target/Target.td"
4+
5+
class I : Instruction {
6+
let InOperandList = (ins i32imm:$op);
7+
let OutOperandList = (outs);
8+
}
9+
10+
// Check that we don't try to read the second byte without ruling out
11+
// 1-byte encodings first. This should actually be a decoding conflict,
12+
// but DecoderEmitter heuristics decide that I8_0 and I8_1 are more specific
13+
// than the rest and give them priority.
14+
15+
// _______0 I8_0
16+
// _______1 I8_1
17+
// 00000000 ________ I16_0
18+
// 00000001 ________ I16_1
19+
// 00000010 ________ I16_2
20+
21+
// CHECK: MCD::OPC_ExtractField, 0, 1, // Inst{0} ...
22+
// CHECK-NEXT: MCD::OPC_FilterValue, 0, 4, 0, // Skip to: 11
23+
// CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, 2, 0, // Opcode: I8_0, DecodeIdx: 0
24+
// CHECK-NEXT: MCD::OPC_FilterValue, 1, 4, 0, // Skip to: 19
25+
// CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, 2, 0, // Opcode: I8_1, DecodeIdx: 0
26+
// CHECK-NEXT: MCD::OPC_ExtractField, 8, 8, // Inst{15-8} ...
27+
// CHECK-NEXT: MCD::OPC_FilterValue, 0, 4, 0, // Skip to: 30
28+
// CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, 2, 1, // Opcode: I16_0, DecodeIdx: 1
29+
// CHECK-NEXT: MCD::OPC_FilterValue, 1, 4, 0, // Skip to: 38
30+
// CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, 2, 1, // Opcode: I16_1, DecodeIdx: 1
31+
// CHECK-NEXT: MCD::OPC_FilterValueOrFail, 2,
32+
// CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, 2, 1, // Opcode: I16_2, DecodeIdx: 1
33+
34+
def I8_0 : I { dag Inst = (descend (operand "$op", 7), 0b0); }
35+
def I8_1 : I { dag Inst = (descend (operand "$op", 7), 0b1); }
36+
def I16_0 : I { dag Inst = (descend 0b00000000, (operand "$op", 8)); }
37+
def I16_1 : I { dag Inst = (descend 0b00000001, (operand "$op", 8)); }
38+
def I16_2 : I { dag Inst = (descend 0b00000010, (operand "$op", 8)); }
39+
40+
def II : InstrInfo;
41+
42+
def MyTarget : Target {
43+
let InstructionSet = II;
44+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// RUN: not llvm-tblgen -gen-disassembler -I %p/../../../include %s -o /dev/null 2>&1 \
2+
// RUN: | FileCheck --strict-whitespace %s
3+
4+
include "llvm/Target/Target.td"
5+
6+
class I : Instruction {
7+
let InOperandList = (ins i32imm:$op, i32imm:$op2, i32imm:$op3);
8+
let OutOperandList = (outs);
9+
}
10+
11+
// Check pretty printing of decoding conflicts.
12+
13+
// CHECK: {{^}}Decoding Conflict:
14+
// CHECK-NEXT: {{^}} ........
15+
// CHECK-NEXT: {{^}} ..............11
16+
// CHECK-NEXT: {{^}} .......0......11
17+
// CHECK-NEXT: {{^}} _______0______11 I16_0
18+
// CHECK-NEXT: {{^}} _______0______11 I16_1
19+
// CHECK-NEXT: {{^}} _______0_______0______11 I24_0
20+
21+
def I8_0 : I { dag Inst = (descend (operand "$op", 6), 0b00); }
22+
def I8_1 : I { dag Inst = (descend (operand "$op", 6), 0b01); }
23+
def I16_0 : I { dag Inst = (descend (operand "$op2", 7), 0b0,
24+
(operand "$op", 6), 0b11); }
25+
def I16_1 : I { dag Inst = (descend (operand "$op2", 7), 0b0,
26+
(operand "$op", 6), 0b11); }
27+
def I24_0 : I { dag Inst = (descend (operand "$op3", 7), 0b0,
28+
(operand "$op2", 7), 0b0,
29+
(operand "$op", 6), 0b11); }
30+
31+
def II : InstrInfo;
32+
33+
def MyTarget : Target {
34+
let InstructionSet = II;
35+
}

llvm/utils/TableGen/DecoderEmitter.cpp

Lines changed: 74 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,19 @@ class InstructionEncoding {
218218
void parseFixedLenOperands(const BitsInit &Bits);
219219
};
220220

221+
/// Sorting predicate to sort encoding IDs by encoding width.
222+
class LessEncodingIDByWidth {
223+
ArrayRef<InstructionEncoding> Encodings;
224+
225+
public:
226+
explicit LessEncodingIDByWidth(ArrayRef<InstructionEncoding> Encodings)
227+
: Encodings(Encodings) {}
228+
229+
bool operator()(unsigned ID1, unsigned ID2) const {
230+
return Encodings[ID1].getBitWidth() < Encodings[ID2].getBitWidth();
231+
}
232+
};
233+
221234
typedef std::vector<uint32_t> FixupList;
222235
typedef std::vector<FixupList> FixupScopeList;
223236
typedef SmallSetVector<CachedHashString, 16> PredicateSet;
@@ -475,8 +488,9 @@ class FilterChooser {
475488
// Vector of encodings to choose our filter.
476489
ArrayRef<InstructionEncoding> Encodings;
477490

478-
// Vector of encoding IDs for this filter chooser to work on.
479-
ArrayRef<unsigned> EncodingIDs;
491+
/// Encoding IDs for this filter chooser to work on.
492+
/// Sorted by non-decreasing encoding width.
493+
SmallVector<unsigned, 0> EncodingIDs;
480494

481495
// The selected filter, if any.
482496
std::unique_ptr<Filter> BestFilter;
@@ -488,8 +502,9 @@ class FilterChooser {
488502
// Links to the FilterChooser above us in the decoding tree.
489503
const FilterChooser *Parent;
490504

491-
// Width of instructions
492-
unsigned BitWidth;
505+
/// Some targets (ARM) specify more encoding bits in Inst that Size allows.
506+
/// This field allows us to ignore the extra bits.
507+
unsigned MaxFilterWidth;
493508

494509
// Parent emitter
495510
const DecoderEmitter *Emitter;
@@ -501,28 +516,47 @@ class FilterChooser {
501516
};
502517

503518
public:
519+
/// Constructs a top-level filter chooser.
504520
FilterChooser(ArrayRef<InstructionEncoding> Encodings,
505-
ArrayRef<unsigned> EncodingIDs, unsigned BW,
521+
ArrayRef<unsigned> EncodingIDs, unsigned MaxFilterWidth,
506522
const DecoderEmitter *E)
507-
: Encodings(Encodings), EncodingIDs(EncodingIDs), FilterBits(BW),
508-
Parent(nullptr), BitWidth(BW), Emitter(E) {
523+
: Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(nullptr),
524+
MaxFilterWidth(MaxFilterWidth), Emitter(E) {
525+
// Sort encoding IDs once.
526+
stable_sort(this->EncodingIDs, LessEncodingIDByWidth(Encodings));
527+
// Filter width is the width of the smallest encoding.
528+
unsigned FilterWidth = Encodings[this->EncodingIDs.front()].getBitWidth();
529+
// Cap it as necessary.
530+
FilterWidth = std::min(FilterWidth, MaxFilterWidth);
531+
FilterBits = KnownBits(FilterWidth);
509532
doFilter();
510533
}
511534

535+
/// Constructs an inferior filter chooser.
512536
FilterChooser(ArrayRef<InstructionEncoding> Encodings,
513-
ArrayRef<unsigned> EncodingIDs,
514-
const KnownBits &ParentFilterBits, const FilterChooser &parent)
515-
: Encodings(Encodings), EncodingIDs(EncodingIDs),
516-
FilterBits(ParentFilterBits), Parent(&parent),
517-
BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
537+
ArrayRef<unsigned> EncodingIDs, const KnownBits &FilterBits,
538+
const FilterChooser &Parent)
539+
: Encodings(Encodings), EncodingIDs(EncodingIDs), Parent(&Parent),
540+
MaxFilterWidth(Parent.MaxFilterWidth), Emitter(Parent.Emitter) {
541+
// Inferior filter choosers are created from sorted array of encoding IDs.
542+
assert(is_sorted(EncodingIDs, LessEncodingIDByWidth(Encodings)));
518543
assert(!FilterBits.hasConflict() && "Broken filter");
544+
// Filter width is the width of the smallest encoding.
545+
unsigned FilterWidth = Encodings[EncodingIDs.front()].getBitWidth();
546+
// Cap it as necessary.
547+
FilterWidth = std::min(FilterWidth, MaxFilterWidth);
548+
this->FilterBits = FilterBits.anyext(FilterWidth);
519549
doFilter();
520550
}
521551

522552
FilterChooser(const FilterChooser &) = delete;
523553
void operator=(const FilterChooser &) = delete;
524554

525-
unsigned getBitWidth() const { return BitWidth; }
555+
/// Returns the width of the largest encoding.
556+
unsigned getMaxEncodingWidth() const {
557+
// The last encoding ID is the ID of an encoding with the largest width.
558+
return Encodings[EncodingIDs.back()].getBitWidth();
559+
}
526560

527561
protected:
528562
KnownBits getMandatoryEncodingBits(unsigned EncodingID) const {
@@ -531,14 +565,12 @@ class FilterChooser {
531565
// Clear all bits that are allowed to change according to SoftFail mask.
532566
EncodingBits.Zero &= ~Encoding.getSoftFailBits();
533567
EncodingBits.One &= ~Encoding.getSoftFailBits();
534-
// Truncate or extend with unknown bits according to the filter bit width.
535-
// FIXME: We should stop doing this.
536-
return EncodingBits.anyextOrTrunc(BitWidth);
568+
return EncodingBits;
537569
}
538570

539571
/// dumpStack - dumpStack traverses the filter chooser chain and calls
540572
/// dumpFilterArray on each filter chooser up to the top level one.
541-
void dumpStack(raw_ostream &OS, indent Indent) const;
573+
void dumpStack(raw_ostream &OS, indent Indent, unsigned PadToWidth) const;
542574

543575
bool isPositionFiltered(unsigned Idx) const {
544576
return FilterBits.Zero[Idx] || FilterBits.One[Idx];
@@ -620,7 +652,7 @@ Filter::Filter(Filter &&f)
620652

621653
Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits)
622654
: Owner(owner), StartBit(startBit), NumBits(numBits) {
623-
assert(StartBit + NumBits - 1 < Owner.BitWidth);
655+
assert(StartBit + NumBits - 1 < Owner.FilterBits.getBitWidth());
624656

625657
for (unsigned EncodingID : Owner.EncodingIDs) {
626658
// Populates the insn given the uid.
@@ -1057,10 +1089,12 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
10571089

10581090
/// dumpStack - dumpStack traverses the filter chooser chain and calls
10591091
/// dumpFilterArray on each filter chooser up to the top level one.
1060-
void FilterChooser::dumpStack(raw_ostream &OS, indent Indent) const {
1092+
void FilterChooser::dumpStack(raw_ostream &OS, indent Indent,
1093+
unsigned PadToWidth) const {
10611094
if (Parent)
1062-
Parent->dumpStack(OS, Indent);
1063-
OS << Indent;
1095+
Parent->dumpStack(OS, Indent, PadToWidth);
1096+
assert(PadToWidth >= FilterBits.getBitWidth());
1097+
OS << Indent << indent(PadToWidth - FilterBits.getBitWidth());
10641098
printKnownBits(OS, FilterBits, '.');
10651099
OS << '\n';
10661100
}
@@ -1080,7 +1114,8 @@ FilterChooser::getIslands(const KnownBits &EncodingBits) const {
10801114
// 2: Island (well-known bit value needed for decoding)
10811115
unsigned State = 0;
10821116

1083-
for (unsigned i = 0; i < BitWidth; ++i) {
1117+
unsigned FilterWidth = FilterBits.getBitWidth();
1118+
for (unsigned i = 0; i != FilterWidth; ++i) {
10841119
bool IsKnown = EncodingBits.Zero[i] || EncodingBits.One[i];
10851120
bool Filtered = isPositionFiltered(i);
10861121
switch (State) {
@@ -1110,7 +1145,7 @@ FilterChooser::getIslands(const KnownBits &EncodingBits) const {
11101145
}
11111146
// If we are still in Island after the loop, do some housekeeping.
11121147
if (State == 2)
1113-
Islands.push_back({StartBit, BitWidth - StartBit, FieldVal});
1148+
Islands.push_back({StartBit, FilterWidth - StartBit, FieldVal});
11141149

11151150
return Islands;
11161151
}
@@ -1316,9 +1351,10 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
13161351
if (SFBits.isZero())
13171352
return;
13181353

1319-
APInt PositiveMask(BitWidth, 0ULL);
1320-
APInt NegativeMask(BitWidth, 0ULL);
1321-
for (unsigned i = 0; i < BitWidth; ++i) {
1354+
unsigned EncodingWidth = InstBits.getBitWidth();
1355+
APInt PositiveMask(EncodingWidth, 0);
1356+
APInt NegativeMask(EncodingWidth, 0);
1357+
for (unsigned i = 0; i != EncodingWidth; ++i) {
13221358
if (!SFBits[i])
13231359
continue;
13241360

@@ -1484,7 +1520,8 @@ bool FilterChooser::filterProcessor(ArrayRef<bitAttr_t> BitAttrs,
14841520
unsigned StartBit = 0;
14851521

14861522
std::vector<std::unique_ptr<Filter>> Filters;
1487-
for (unsigned BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1523+
unsigned FilterWidth = FilterBits.getBitWidth();
1524+
for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
14881525
bitAttr_t bitAttr = BitAttrs[BitIndex];
14891526

14901527
assert(bitAttr != ATTR_NONE && "Bit without attributes");
@@ -1565,12 +1602,12 @@ bool FilterChooser::filterProcessor(ArrayRef<bitAttr_t> BitAttrs,
15651602
case ATTR_FILTERED:
15661603
break;
15671604
case ATTR_ALL_SET:
1568-
reportRegion(Filters, RA, StartBit, BitWidth, AllowMixed);
1605+
reportRegion(Filters, RA, StartBit, FilterWidth, AllowMixed);
15691606
break;
15701607
case ATTR_ALL_UNSET:
15711608
break;
15721609
case ATTR_MIXED:
1573-
reportRegion(Filters, RA, StartBit, BitWidth, AllowMixed);
1610+
reportRegion(Filters, RA, StartBit, FilterWidth, AllowMixed);
15741611
break;
15751612
}
15761613

@@ -1628,18 +1665,19 @@ void FilterChooser::doFilter() {
16281665
// (MIXED) ------ . ----> (MIXED)
16291666
// (FILTERED)---- . ----> (FILTERED)
16301667

1631-
SmallVector<bitAttr_t, 128> BitAttrs(BitWidth, ATTR_NONE);
1668+
unsigned FilterWidth = FilterBits.getBitWidth();
1669+
SmallVector<bitAttr_t, 128> BitAttrs(FilterWidth, ATTR_NONE);
16321670

16331671
// FILTERED bit positions provide no entropy and are not worthy of pursuing.
16341672
// Filter::recurse() set either 1 or 0 for each position.
1635-
for (unsigned BitIndex = 0; BitIndex < BitWidth; ++BitIndex)
1673+
for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex)
16361674
if (isPositionFiltered(BitIndex))
16371675
BitAttrs[BitIndex] = ATTR_FILTERED;
16381676

16391677
for (unsigned EncodingID : EncodingIDs) {
16401678
KnownBits EncodingBits = getMandatoryEncodingBits(EncodingID);
16411679

1642-
for (unsigned BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1680+
for (unsigned BitIndex = 0; BitIndex != FilterWidth; ++BitIndex) {
16431681
bool IsKnown = EncodingBits.Zero[BitIndex] || EncodingBits.One[BitIndex];
16441682
switch (BitAttrs[BitIndex]) {
16451683
case ATTR_NONE:
@@ -1688,12 +1726,14 @@ void FilterChooser::doFilter() {
16881726

16891727
// Dump filters.
16901728
indent Indent(4);
1691-
dumpStack(errs(), Indent);
1729+
// Helps to keep the output right-justified.
1730+
unsigned PadToWidth = getMaxEncodingWidth();
1731+
dumpStack(errs(), Indent, PadToWidth);
16921732

16931733
// Dump encodings.
16941734
for (unsigned EncodingID : EncodingIDs) {
16951735
const InstructionEncoding &Enc = Encodings[EncodingID];
1696-
errs() << Indent;
1736+
errs() << Indent << indent(PadToWidth - Enc.getBitWidth());
16971737
printKnownBits(errs(), getMandatoryEncodingBits(EncodingID), '_');
16981738
errs() << " " << Enc.getName() << '\n';
16991739
}

0 commit comments

Comments
 (0)