66//
77// ===----------------------------------------------------------------------===//
88//
9- // We wish to encode the SourceLocation from other module file not dependent
10- // on the other module file. So that the source location changes from other
11- // module file may not affect the contents of the current module file. Then the
12- // users don't need to recompile the whole project due to a new line in a module
13- // unit in the root of the dependency graph.
9+ // Source locations are stored pervasively in the AST, making up a third of
10+ // the size of typical serialized files. Storing them efficiently is important.
1411//
15- // To achieve this, we need to encode the index of the module file into the
16- // encoding of the source location. The encoding of the source location may be:
12+ // We use integers optimized by VBR-encoding, because:
13+ // - when abbreviations cannot be used, VBR6 encoding is our only choice
14+ // - in the worst case a SourceLocation can be ~any 32-bit number, but in
15+ // practice they are highly predictable
1716//
18- // |-----------------------|-----------------------|
19- // | A | B | C |
20- //
21- // * A: 32 bit. The index of the module file in the module manager + 1. The +1
22- // here is necessary since we wish 0 stands for the current module file.
23- // * B: 31 bit. The offset of the source location to the module file containing
24- // it.
25- // * C: The macro bit. We rotate it to the lowest bit so that we can save some
26- // space in case the index of the module file is 0.
27- //
28- // Specially, if the index of the module file is 0, we allow to encode a
29- // sequence of locations we store only differences between successive elements.
17+ // We encode the integer so that likely values encode as small numbers that
18+ // turn into few VBR chunks:
19+ // - the invalid sentinel location is a very common value: it encodes as 0
20+ // - the "macro or not" bit is stored at the bottom of the integer
21+ // (rather than at the top, as in memory), so macro locations can have
22+ // small representations.
23+ // - related locations (e.g. of a left and right paren pair) are usually
24+ // similar, so when encoding a sequence of locations we store only
25+ // differences between successive elements.
3026//
3127// ===----------------------------------------------------------------------===//
3228
33- #include " clang/Basic/SourceLocation.h"
34- #include " llvm/Support/MathExtras.h"
3529#include < climits>
30+ #include " clang/Basic/SourceLocation.h"
3631
3732#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
3833#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
@@ -57,13 +52,9 @@ class SourceLocationEncoding {
5752 friend SourceLocationSequence;
5853
5954public:
60- using RawLocEncoding = uint64_t ;
61-
62- static RawLocEncoding encode (SourceLocation Loc, UIntTy BaseOffset,
63- unsigned BaseModuleFileIndex,
64- SourceLocationSequence * = nullptr );
65- static std::pair<SourceLocation, unsigned >
66- decode (RawLocEncoding, SourceLocationSequence * = nullptr );
55+ static uint64_t encode (SourceLocation Loc,
56+ SourceLocationSequence * = nullptr );
57+ static SourceLocation decode (uint64_t , SourceLocationSequence * = nullptr );
6758};
6859
6960// / Serialized encoding of a sequence of SourceLocations.
@@ -158,44 +149,14 @@ class SourceLocationSequence::State {
158149 operator SourceLocationSequence *() { return &Seq; }
159150};
160151
161- inline SourceLocationEncoding::RawLocEncoding
162- SourceLocationEncoding::encode (SourceLocation Loc, UIntTy BaseOffset,
163- unsigned BaseModuleFileIndex,
164- SourceLocationSequence *Seq) {
165- // If the source location is a local source location, we can try to optimize
166- // the similar sequences to only record the differences.
167- if (!BaseOffset)
168- return Seq ? Seq->encode (Loc) : encodeRaw (Loc.getRawEncoding ());
169-
170- if (Loc.isInvalid ())
171- return 0 ;
172-
173- // Otherwise, the higher bits are used to store the module file index,
174- // so it is meaningless to optimize the source locations into small
175- // integers. Let's try to always use the raw encodings.
176- assert (Loc.getOffset () >= BaseOffset);
177- Loc = Loc.getLocWithOffset (-BaseOffset);
178- RawLocEncoding Encoded = encodeRaw (Loc.getRawEncoding ());
179-
180- // 16 bits should be sufficient to store the module file index.
181- assert (BaseModuleFileIndex < (1 << 16 ));
182- Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32 ;
183- return Encoded;
152+ inline uint64_t SourceLocationEncoding::encode (SourceLocation Loc,
153+ SourceLocationSequence *Seq) {
154+ return Seq ? Seq->encode (Loc) : encodeRaw (Loc.getRawEncoding ());
184155}
185- inline std::pair<SourceLocation, unsigned >
186- SourceLocationEncoding::decode (RawLocEncoding Encoded,
187- SourceLocationSequence *Seq) {
188- unsigned ModuleFileIndex = Encoded >> 32 ;
189-
190- if (!ModuleFileIndex)
191- return {Seq ? Seq->decode (Encoded)
192- : SourceLocation::getFromRawEncoding (decodeRaw (Encoded)),
193- ModuleFileIndex};
194-
195- Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32 );
196- SourceLocation Loc = SourceLocation::getFromRawEncoding (decodeRaw (Encoded));
197-
198- return {Loc, ModuleFileIndex};
156+ inline SourceLocation
157+ SourceLocationEncoding::decode (uint64_t Encoded, SourceLocationSequence *Seq) {
158+ return Seq ? Seq->decode (Encoded)
159+ : SourceLocation::getFromRawEncoding (decodeRaw (Encoded));
199160}
200161
201162} // namespace clang
0 commit comments