Skip to content

Commit 6cd71d3

Browse files
committed
[Serialization] Remove delta encoding optimization (llvm#145670)
See the discussion in llvm#145529. This will slightly increase the PCM size (~5%), some data (in-memory preamble size in clangd): - SemaExpr.cpp: 77MB -> 80MB - FindTarget.cpp: 71MB -> 75MB
1 parent 2c44023 commit 6cd71d3

File tree

8 files changed

+53
-238
lines changed

8 files changed

+53
-238
lines changed

clang/include/clang/Serialization/ASTReader.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -446,8 +446,6 @@ class ASTReader
446446
using ModuleReverseIterator = ModuleManager::ModuleReverseIterator;
447447

448448
private:
449-
using LocSeq = SourceLocationSequence;
450-
451449
/// The receiver of some callbacks invoked by ASTReader.
452450
std::unique_ptr<ASTReaderListener> Listener;
453451

@@ -2426,18 +2424,16 @@ class ASTReader
24262424
/// Read a source location from raw form and return it in its
24272425
/// originating module file's source location space.
24282426
std::pair<SourceLocation, unsigned>
2429-
ReadUntranslatedSourceLocation(RawLocEncoding Raw,
2430-
LocSeq *Seq = nullptr) const {
2431-
return SourceLocationEncoding::decode(Raw, Seq);
2427+
ReadUntranslatedSourceLocation(RawLocEncoding Raw) const {
2428+
return SourceLocationEncoding::decode(Raw);
24322429
}
24332430

24342431
/// Read a source location from raw form.
2435-
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw,
2436-
LocSeq *Seq = nullptr) const {
2432+
SourceLocation ReadSourceLocation(ModuleFile &MF, RawLocEncoding Raw) const {
24372433
if (!MF.ModuleOffsetMap.empty())
24382434
ReadModuleOffsetMap(MF);
24392435

2440-
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw, Seq);
2436+
auto [Loc, ModuleFileIndex] = ReadUntranslatedSourceLocation(Raw);
24412437
ModuleFile *OwningModuleFile =
24422438
ModuleFileIndex == 0 ? &MF : MF.TransitiveImports[ModuleFileIndex - 1];
24432439

@@ -2465,9 +2461,9 @@ class ASTReader
24652461

24662462
/// Read a source location.
24672463
SourceLocation ReadSourceLocation(ModuleFile &ModuleFile,
2468-
const RecordDataImpl &Record, unsigned &Idx,
2469-
LocSeq *Seq = nullptr) {
2470-
return ReadSourceLocation(ModuleFile, Record[Idx++], Seq);
2464+
const RecordDataImpl &Record,
2465+
unsigned &Idx) {
2466+
return ReadSourceLocation(ModuleFile, Record[Idx++]);
24712467
}
24722468

24732469
/// Read a FileID.
@@ -2486,7 +2482,7 @@ class ASTReader
24862482

24872483
/// Read a source range.
24882484
SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record,
2489-
unsigned &Idx, LocSeq *Seq = nullptr);
2485+
unsigned &Idx);
24902486

24912487
static llvm::BitVector ReadBitVector(const RecordData &Record,
24922488
const StringRef Blob);

clang/include/clang/Serialization/ASTRecordReader.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ class OMPChildren;
3232
class ASTRecordReader
3333
: public serialization::DataStreamBasicReader<ASTRecordReader> {
3434
using ModuleFile = serialization::ModuleFile;
35-
using LocSeq = SourceLocationSequence;
3635

3736
ASTReader *Reader;
3837
ModuleFile *F;
@@ -160,7 +159,7 @@ class ASTRecordReader
160159
TypeSourceInfo *readTypeSourceInfo();
161160

162161
/// Reads the location information for a type.
163-
void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
162+
void readTypeLoc(TypeLoc TL);
164163

165164
/// Map a local type ID within a given AST file to a global type ID.
166165
serialization::TypeID getGlobalTypeID(serialization::TypeID LocalID) const {
@@ -287,13 +286,13 @@ class ASTRecordReader
287286
void readOpenACCRoutineDeclAttr(OpenACCRoutineDeclAttr *A);
288287

289288
/// Read a source location, advancing Idx.
290-
SourceLocation readSourceLocation(LocSeq *Seq = nullptr) {
291-
return Reader->ReadSourceLocation(*F, Record, Idx, Seq);
289+
SourceLocation readSourceLocation() {
290+
return Reader->ReadSourceLocation(*F, Record, Idx);
292291
}
293292

294293
/// Read a source range, advancing Idx.
295-
SourceRange readSourceRange(LocSeq *Seq = nullptr) {
296-
return Reader->ReadSourceRange(*F, Record, Idx, Seq);
294+
SourceRange readSourceRange() {
295+
return Reader->ReadSourceRange(*F, Record, Idx);
297296
}
298297

299298
/// Read an arbitrary constant value, advancing Idx.

clang/include/clang/Serialization/ASTRecordWriter.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ class TypeLoc;
2828
/// An object for streaming information to a record.
2929
class ASTRecordWriter
3030
: public serialization::DataStreamBasicWriter<ASTRecordWriter> {
31-
using LocSeq = SourceLocationSequence;
3231

3332
ASTWriter *Writer;
3433
ASTWriter::RecordDataImpl *Record;
@@ -139,8 +138,8 @@ class ASTRecordWriter
139138
void AddFunctionDefinition(const FunctionDecl *FD);
140139

141140
/// Emit a source location.
142-
void AddSourceLocation(SourceLocation Loc, LocSeq *Seq = nullptr) {
143-
return Writer->AddSourceLocation(Loc, *Record, Seq);
141+
void AddSourceLocation(SourceLocation Loc) {
142+
return Writer->AddSourceLocation(Loc, *Record);
144143
}
145144
void writeSourceLocation(SourceLocation Loc) {
146145
AddSourceLocation(Loc);
@@ -166,8 +165,8 @@ class ASTRecordWriter
166165
}
167166

168167
/// Emit a source range.
169-
void AddSourceRange(SourceRange Range, LocSeq *Seq = nullptr) {
170-
return Writer->AddSourceRange(Range, *Record, Seq);
168+
void AddSourceRange(SourceRange Range) {
169+
return Writer->AddSourceRange(Range, *Record);
171170
}
172171

173172
void writeBool(bool Value) {
@@ -237,7 +236,7 @@ class ASTRecordWriter
237236
void AddTypeSourceInfo(TypeSourceInfo *TInfo);
238237

239238
/// Emits source location information for a type. Does not emit the type.
240-
void AddTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr);
239+
void AddTypeLoc(TypeLoc TL);
241240

242241
/// Emits a template argument location info.
243242
void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind,

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,6 @@ class ASTWriter : public ASTDeserializationListener,
112112
using TypeIdxMap = llvm::DenseMap<QualType, serialization::TypeIdx,
113113
serialization::UnsafeQualTypeDenseMapInfo>;
114114

115-
using LocSeq = SourceLocationSequence;
116-
117115
/// The bitstream writer used to emit this precompiled header.
118116
llvm::BitstreamWriter &Stream;
119117

@@ -732,16 +730,14 @@ class ASTWriter : public ASTDeserializationListener,
732730
void AddFileID(FileID FID, RecordDataImpl &Record);
733731

734732
/// Emit a source location.
735-
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record,
736-
LocSeq *Seq = nullptr);
733+
void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record);
737734

738735
/// Return the raw encodings for source locations.
739736
SourceLocationEncoding::RawLocEncoding
740-
getRawSourceLocationEncoding(SourceLocation Loc, LocSeq *Seq = nullptr);
737+
getRawSourceLocationEncoding(SourceLocation Loc);
741738

742739
/// Emit a source range.
743-
void AddSourceRange(SourceRange Range, RecordDataImpl &Record,
744-
LocSeq *Seq = nullptr);
740+
void AddSourceRange(SourceRange Range, RecordDataImpl &Record);
745741

746742
/// Emit a reference to an identifier.
747743
void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record);

clang/include/clang/Serialization/SourceLocationEncoding.h

Lines changed: 6 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
// * C: The macro bit. We rotate it to the lowest bit so that we can save some
2626
// space in case the index of the module file is 0.
2727
//
28-
// Specially, if the index of the module file is 0, we allow to encode a
29-
// sequence of locations we store only differences between successive elements.
3028
//
3129
//===----------------------------------------------------------------------===//
3230

@@ -38,7 +36,6 @@
3836
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
3937

4038
namespace clang {
41-
class SourceLocationSequence;
4239

4340
/// Serialized encoding of SourceLocations without context.
4441
/// Optimized to have small unsigned values (=> small after VBR encoding).
@@ -54,119 +51,22 @@ class SourceLocationEncoding {
5451
static UIntTy decodeRaw(UIntTy Raw) {
5552
return (Raw >> 1) | (Raw << (UIntBits - 1));
5653
}
57-
friend SourceLocationSequence;
5854

5955
public:
6056
using RawLocEncoding = uint64_t;
6157

6258
static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
63-
unsigned BaseModuleFileIndex,
64-
SourceLocationSequence * = nullptr);
65-
static std::pair<SourceLocation, unsigned>
66-
decode(RawLocEncoding, SourceLocationSequence * = nullptr);
67-
};
68-
69-
/// Serialized encoding of a sequence of SourceLocations.
70-
///
71-
/// Optimized to produce small values when locations with the sequence are
72-
/// similar. Each element can be delta-encoded against the last nonzero element.
73-
///
74-
/// Sequences should be started by creating a SourceLocationSequence::State,
75-
/// and then passed around as SourceLocationSequence*. Example:
76-
///
77-
/// // establishes a sequence
78-
/// void EmitTopLevelThing() {
79-
/// SourceLocationSequence::State Seq;
80-
/// EmitContainedThing(Seq);
81-
/// EmitRecursiveThing(Seq);
82-
/// }
83-
///
84-
/// // optionally part of a sequence
85-
/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
86-
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
87-
/// }
88-
///
89-
/// // establishes a sequence if there isn't one already
90-
/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
91-
/// SourceLocationSequence::State Seq(ParentSeq);
92-
/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
93-
/// EmitRecursiveThing(Seq);
94-
/// }
95-
///
96-
class SourceLocationSequence {
97-
using UIntTy = SourceLocation::UIntTy;
98-
using EncodedTy = uint64_t;
99-
constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
100-
static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
101-
102-
// Prev stores the rotated last nonzero location.
103-
UIntTy &Prev;
104-
105-
// Zig-zag encoding turns small signed integers into small unsigned integers.
106-
// 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
107-
static UIntTy zigZag(UIntTy V) {
108-
UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
109-
return Sign ^ (V << 1);
110-
}
111-
static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
112-
113-
SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
114-
115-
EncodedTy encodeRaw(UIntTy Raw) {
116-
if (Raw == 0)
117-
return 0;
118-
UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
119-
if (Prev == 0)
120-
return Prev = Rotated;
121-
UIntTy Delta = Rotated - Prev;
122-
Prev = Rotated;
123-
// Exactly one 33 bit value is possible! (1 << 32).
124-
// This is because we have two representations of zero: trivial & relative.
125-
return 1 + EncodedTy{zigZag(Delta)};
126-
}
127-
UIntTy decodeRaw(EncodedTy Encoded) {
128-
if (Encoded == 0)
129-
return 0;
130-
if (Prev == 0)
131-
return SourceLocationEncoding::decodeRaw(Prev = Encoded);
132-
return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
133-
}
134-
135-
public:
136-
SourceLocation decode(EncodedTy Encoded) {
137-
return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
138-
}
139-
EncodedTy encode(SourceLocation Loc) {
140-
return encodeRaw(Loc.getRawEncoding());
141-
}
142-
143-
class State;
144-
};
145-
146-
/// This object establishes a SourceLocationSequence.
147-
class SourceLocationSequence::State {
148-
UIntTy Prev = 0;
149-
SourceLocationSequence Seq;
150-
151-
public:
152-
// If Parent is provided and non-null, then this root becomes part of that
153-
// enclosing sequence instead of establishing a new one.
154-
State(SourceLocationSequence *Parent = nullptr)
155-
: Seq(Parent ? Parent->Prev : Prev) {}
156-
157-
// Implicit conversion for uniform use of roots vs propagated sequences.
158-
operator SourceLocationSequence *() { return &Seq; }
59+
unsigned BaseModuleFileIndex);
60+
static std::pair<SourceLocation, unsigned> decode(RawLocEncoding);
15961
};
16062

16163
inline SourceLocationEncoding::RawLocEncoding
16264
SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
163-
unsigned BaseModuleFileIndex,
164-
SourceLocationSequence *Seq) {
65+
unsigned BaseModuleFileIndex) {
16566
// If the source location is a local source location, we can try to optimize
16667
// the similar sequences to only record the differences.
16768
if (!BaseOffset)
168-
return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
169-
69+
return encodeRaw(Loc.getRawEncoding());
17070
if (Loc.isInvalid())
17171
return 0;
17272

@@ -183,13 +83,11 @@ SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
18383
return Encoded;
18484
}
18585
inline std::pair<SourceLocation, unsigned>
186-
SourceLocationEncoding::decode(RawLocEncoding Encoded,
187-
SourceLocationSequence *Seq) {
86+
SourceLocationEncoding::decode(RawLocEncoding Encoded) {
18887
unsigned ModuleFileIndex = Encoded >> 32;
18988

19089
if (!ModuleFileIndex)
191-
return {Seq ? Seq->decode(Encoded)
192-
: SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
90+
return {SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
19391
ModuleFileIndex};
19492

19593
Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);

clang/lib/Serialization/ASTReader.cpp

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,10 +1914,9 @@ bool ASTReader::ReadSLocEntry(int ID) {
19141914
}
19151915

19161916
case SM_SLOC_EXPANSION_ENTRY: {
1917-
LocSeq::State Seq;
1918-
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1], Seq);
1919-
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2], Seq);
1920-
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3], Seq);
1917+
SourceLocation SpellingLoc = ReadSourceLocation(*F, Record[1]);
1918+
SourceLocation ExpansionBegin = ReadSourceLocation(*F, Record[2]);
1919+
SourceLocation ExpansionEnd = ReadSourceLocation(*F, Record[3]);
19211920
SourceMgr.createExpansionLoc(SpellingLoc, ExpansionBegin, ExpansionEnd,
19221921
Record[5], Record[4], ID,
19231922
BaseOffset + Record[0]);
@@ -7072,13 +7071,10 @@ QualType ASTReader::readTypeRecord(TypeID ID) {
70727071
namespace clang {
70737072

70747073
class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
7075-
using LocSeq = SourceLocationSequence;
7076-
70777074
ASTRecordReader &Reader;
7078-
LocSeq *Seq;
70797075

7080-
SourceLocation readSourceLocation() { return Reader.readSourceLocation(Seq); }
7081-
SourceRange readSourceRange() { return Reader.readSourceRange(Seq); }
7076+
SourceLocation readSourceLocation() { return Reader.readSourceLocation(); }
7077+
SourceRange readSourceRange() { return Reader.readSourceRange(); }
70827078

70837079
TypeSourceInfo *GetTypeSourceInfo() {
70847080
return Reader.readTypeSourceInfo();
@@ -7093,8 +7089,7 @@ class TypeLocReader : public TypeLocVisitor<TypeLocReader> {
70937089
}
70947090

70957091
public:
7096-
TypeLocReader(ASTRecordReader &Reader, LocSeq *Seq)
7097-
: Reader(Reader), Seq(Seq) {}
7092+
TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {}
70987093

70997094
// We want compile-time assurance that we've enumerated all of
71007095
// these, so unfortunately we have to declare them first, then
@@ -7458,9 +7453,8 @@ void TypeLocReader::VisitDependentBitIntTypeLoc(
74587453
TL.setNameLoc(readSourceLocation());
74597454
}
74607455

7461-
void ASTRecordReader::readTypeLoc(TypeLoc TL, LocSeq *ParentSeq) {
7462-
LocSeq::State Seq(ParentSeq);
7463-
TypeLocReader TLR(*this, Seq);
7456+
void ASTRecordReader::readTypeLoc(TypeLoc TL) {
7457+
TypeLocReader TLR(*this);
74647458
for (; !TL.isNull(); TL = TL.getNextTypeLoc())
74657459
TLR.Visit(TL);
74667460
}
@@ -10016,9 +10010,9 @@ ASTRecordReader::readNestedNameSpecifierLoc() {
1001610010
}
1001710011

1001810012
SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record,
10019-
unsigned &Idx, LocSeq *Seq) {
10020-
SourceLocation beg = ReadSourceLocation(F, Record, Idx, Seq);
10021-
SourceLocation end = ReadSourceLocation(F, Record, Idx, Seq);
10013+
unsigned &Idx) {
10014+
SourceLocation beg = ReadSourceLocation(F, Record, Idx);
10015+
SourceLocation end = ReadSourceLocation(F, Record, Idx);
1002210016
return SourceRange(beg, end);
1002310017
}
1002410018

0 commit comments

Comments
 (0)