Skip to content

Commit 37ed49e

Browse files
committed
Implement ordering of source locations that accounts for macro expansion
SourceManager's `isBeforeInBuffer` in buffer takes two source locations that are assumed to be within the same source buffer and determines whether the first precedes the second. However, due to macro expansion, there can be source locations in different source buffers that are nonetheless conceptually part of the same source file. For such cases, `isBeforeInBuffer` will silently produce wrong results. Extend `SourceManager` with a new `isBefore` operation that determines whether one source location precedes another in the same conceptual source file, even if they are in different source buffers that (e.g.) represent macro expansions within that source file. Also introduce `isAtOrBefore` to cover the case where the source locations might be equivalent, which was previously handled via a separate equality check on the source locations that won't work with macro expansions. Add caching for the list of accessors of a buffer, so we aren't doing a full walk up the source-buffer chain ever time we do a comparison of source locations. LCA is still linear-time, but this eliminates extraneous hash table lookups along the way. Over time, we should both move more clients over to these new variants and introduce more assertions into the old (in-buffer) versions to catch improper uses of them.
1 parent c060a90 commit 37ed49e

File tree

3 files changed

+142
-65
lines changed

3 files changed

+142
-65
lines changed

include/swift/Basic/SourceManager.h

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "swift/Basic/FileSystem.h"
1717
#include "swift/Basic/SourceLoc.h"
1818
#include "clang/Basic/FileManager.h"
19+
#include "llvm/ADT/ArrayRef.h"
1920
#include "llvm/ADT/DenseSet.h"
2021
#include "llvm/ADT/None.h"
2122
#include "llvm/ADT/Optional.h"
@@ -69,6 +70,10 @@ class GeneratedSourceInfo {
6970
/// The name of the source file on disk that was created to hold the
7071
/// contents of this file for external clients.
7172
StringRef onDiskBufferCopyFileName = StringRef();
73+
74+
/// Contains the ancestors of this source buffer, starting with the root source
75+
/// buffer and ending at this source buffer.
76+
mutable llvm::ArrayRef<unsigned> ancestors = llvm::ArrayRef<unsigned>();
7277
};
7378

7479
/// This class manages and owns source buffers.
@@ -205,6 +210,15 @@ class SourceManager {
205210
llvm::Optional<GeneratedSourceInfo>
206211
getGeneratedSourceInfo(unsigned bufferID) const;
207212

213+
/// Retrieve the list of ancestors of the given source buffer, starting with
214+
/// the root buffer and proceding to the given buffer ID at the end.
215+
///
216+
/// The scratch parameter will be used to avoid allocation in the case where
217+
/// the given buffer ID is the top-level buffer, in which case bufferID will
218+
/// be written into scratch at the returned array will contain that one
219+
/// element.
220+
ArrayRef<unsigned> getAncestors(unsigned bufferID, unsigned &scratch) const;
221+
208222
/// Record the starting source location of a regex literal.
209223
void recordRegexLiteralStartLoc(SourceLoc loc) {
210224
RegexLiteralStartLocs.insert(loc);
@@ -216,13 +230,35 @@ class SourceManager {
216230
return RegexLiteralStartLocs.contains(loc);
217231
}
218232

219-
/// Returns true if \c LHS is before \c RHS in the source buffer.
233+
/// Returns true if \c first is before \c second in the same source file,
234+
/// accounting for the possibility that first and second are in different
235+
/// source buffers that are conceptually within the same source file,
236+
/// due to macro expansion.
237+
bool isBefore(SourceLoc first, SourceLoc second) const;
238+
239+
/// Returns true if \c first is at or before \c second in the same source
240+
/// file, accounting for the possibility that first and second are in
241+
/// different source buffers that are conceptually within the same source
242+
/// file, due to macro expansion.
243+
bool isAtOrBefore(SourceLoc first, SourceLoc second) const;
244+
245+
/// Returns true if \c LHS is before \c RHS in the same source buffer.
220246
bool isBeforeInBuffer(SourceLoc LHS, SourceLoc RHS) const {
221247
return LHS.Value.getPointer() < RHS.Value.getPointer();
222248
}
223249

224-
/// Returns true if range \c R contains the location \c Loc. The location
225-
/// \c Loc should point at the beginning of the token.
250+
/// Returns true if \c range contains the location \c loc. The location
251+
/// \c loc should point at the beginning of the token.
252+
///
253+
/// This function accounts for the possibility that the source locations
254+
/// provided might come from different source buffers that are conceptually
255+
/// part of the same source file, for example due to macro expansion.
256+
bool containsTokenLoc(SourceRange range, SourceLoc loc) const;
257+
258+
/// Returns true if range \c R contains the location \c Loc, where all
259+
/// locations are known to be in the same source buffer.
260+
///
261+
/// The location \c Loc should point at the beginning of the token.
226262
bool rangeContainsTokenLoc(SourceRange R, SourceLoc Loc) const {
227263
return Loc == R.Start || Loc == R.End ||
228264
(isBeforeInBuffer(R.Start, Loc) && isBeforeInBuffer(Loc, R.End));

lib/AST/ASTScopeLookup.cpp

Lines changed: 3 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -109,63 +109,6 @@ static SourceLoc translateLocForReplacedRange(SourceManager &sourceMgr,
109109
return loc;
110110
}
111111

112-
/// Populate the ancestors list for this buffer, with the root source buffer
113-
/// at the beginning and the given source buffer at the end.
114-
static void populateAncestors(
115-
SourceManager &sourceMgr, unsigned bufferID,
116-
SmallVectorImpl<unsigned> &ancestors) {
117-
if (auto info = sourceMgr.getGeneratedSourceInfo(bufferID)) {
118-
auto ancestorLoc = info->originalSourceRange.getStart();
119-
if (ancestorLoc.isValid()) {
120-
auto ancestorBufferID = sourceMgr.findBufferContainingLoc(ancestorLoc);
121-
populateAncestors(sourceMgr, ancestorBufferID, ancestors);
122-
}
123-
}
124-
125-
ancestors.push_back(bufferID);
126-
}
127-
128-
/// Determine whether the first source location precedes the second, accounting
129-
/// for macro expansions.
130-
static bool isBeforeInSource(
131-
SourceManager &sourceMgr, SourceLoc firstLoc, SourceLoc secondLoc,
132-
bool allowEqual) {
133-
// If the two locations are in the same source buffer, compare their pointers.
134-
unsigned firstBufferID = sourceMgr.findBufferContainingLoc(firstLoc);
135-
unsigned secondBufferID = sourceMgr.findBufferContainingLoc(secondLoc);
136-
if (firstBufferID == secondBufferID) {
137-
return sourceMgr.isBeforeInBuffer(firstLoc, secondLoc) ||
138-
(allowEqual && firstLoc == secondLoc);
139-
}
140-
141-
// If the two locations are in different source buffers, we need to compute
142-
// the least common ancestor.
143-
SmallVector<unsigned, 4> firstAncestors;
144-
populateAncestors(sourceMgr, firstBufferID, firstAncestors);
145-
SmallVector<unsigned, 4> secondAncestors;
146-
populateAncestors(sourceMgr, secondBufferID, secondAncestors);
147-
148-
// Find the first mismatch between the two ancestor lists; this is the
149-
// point of divergence.
150-
auto [firstMismatch, secondMismatch] = std::mismatch(
151-
firstAncestors.begin(), firstAncestors.end(),
152-
secondAncestors.begin(), secondAncestors.end());
153-
assert(firstMismatch != firstAncestors.begin() &&
154-
secondMismatch != secondAncestors.begin() &&
155-
"Ancestors don't have the same root source file");
156-
157-
SourceLoc firstLocInLCA = firstMismatch == firstAncestors.end()
158-
? firstLoc
159-
: sourceMgr.getGeneratedSourceInfo(*firstMismatch)
160-
->originalSourceRange.getStart();
161-
SourceLoc secondLocInLCA = secondMismatch == secondAncestors.end()
162-
? secondLoc
163-
: sourceMgr.getGeneratedSourceInfo(*secondMismatch)
164-
->originalSourceRange.getStart();
165-
return sourceMgr.isBeforeInBuffer(firstLocInLCA, secondLocInLCA) ||
166-
(allowEqual && firstLocInLCA == secondLocInLCA);
167-
}
168-
169112
NullablePtr<ASTScopeImpl>
170113
ASTScopeImpl::findChildContaining(ModuleDecl *parentModule,
171114
SourceLoc loc,
@@ -182,17 +125,15 @@ ASTScopeImpl::findChildContaining(ModuleDecl *parentModule,
182125
Lexer::getLocForEndOfToken(sourceMgr, rangeOfScope.End);
183126

184127
loc = translateLocForReplacedRange(sourceMgr, rangeOfScope.Start, loc);
185-
return isBeforeInSource(
186-
sourceMgr, endOfScope, loc, /*allowEqual=*/true);
128+
return sourceMgr.isAtOrBefore(endOfScope, loc);
187129
});
188130

189131
if (child != getChildren().end()) {
190132
auto rangeOfScope = (*child)->getSourceRangeOfThisASTNode();
191133
auto endOfScope = Lexer::getLocForEndOfToken(sourceMgr, rangeOfScope.End);
192134
loc = translateLocForReplacedRange(sourceMgr, rangeOfScope.Start, loc);
193-
if (isBeforeInSource(sourceMgr, rangeOfScope.Start, loc,
194-
/*allowEqual=*/true) &&
195-
isBeforeInSource(sourceMgr, loc, endOfScope, /*allowEqual=*/false))
135+
if (sourceMgr.isAtOrBefore(rangeOfScope.Start, loc) &&
136+
sourceMgr.isBefore(loc, endOfScope))
196137
return *child;
197138
}
198139

lib/Basic/SourceLoc.cpp

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ SourceManager::getIDForBufferIdentifier(StringRef BufIdentifier) const {
216216
SourceManager::~SourceManager() {
217217
for (auto &generated : GeneratedSourceInfos) {
218218
free((void*)generated.second.onDiskBufferCopyFileName.data());
219+
220+
if (generated.second.ancestors.size() > 0) {
221+
delete [] generated.second.ancestors.data();
222+
}
219223
}
220224
}
221225

@@ -717,3 +721,99 @@ SourceManager::getLocForForeignLoc(SourceLoc otherLoc,
717721

718722
return SourceLoc();
719723
}
724+
725+
/// Populate the ancestors list for this buffer, with the root source buffer
726+
/// at the beginning and the given source buffer at the end.
727+
static void populateAncestors(
728+
const SourceManager &sourceMgr, unsigned bufferID,
729+
SmallVectorImpl<unsigned> &ancestors) {
730+
if (auto info = sourceMgr.getGeneratedSourceInfo(bufferID)) {
731+
auto ancestorLoc = info->originalSourceRange.getStart();
732+
if (ancestorLoc.isValid()) {
733+
auto ancestorBufferID = sourceMgr.findBufferContainingLoc(ancestorLoc);
734+
populateAncestors(sourceMgr, ancestorBufferID, ancestors);
735+
}
736+
}
737+
738+
ancestors.push_back(bufferID);
739+
}
740+
741+
ArrayRef<unsigned> SourceManager::getAncestors(
742+
unsigned bufferID, unsigned &scratch
743+
) const {
744+
// If there is no generated source information for this buffer, then this is
745+
// the only buffer here. Avoid memory allocation by using the scratch space
746+
// we were given.
747+
auto knownInfo = GeneratedSourceInfos.find(bufferID);
748+
if (knownInfo == GeneratedSourceInfos.end()) {
749+
scratch = bufferID;
750+
return ArrayRef<unsigned>(&scratch, 1);
751+
}
752+
753+
// If we already have the ancestors cached, use them.
754+
if (!knownInfo->second.ancestors.empty())
755+
return knownInfo->second.ancestors;
756+
757+
// Compute all of the ancestors. We only do this once for a given buffer.
758+
SmallVector<unsigned, 4> ancestors;
759+
populateAncestors(*this, bufferID, ancestors);
760+
761+
// Cache the ancestors in the generated source info record.
762+
unsigned *ancestorsPtr = new unsigned [ancestors.size()];
763+
std::copy(ancestors.begin(), ancestors.end(), ancestorsPtr);
764+
knownInfo->second.ancestors = llvm::makeArrayRef(ancestorsPtr, ancestors.size());
765+
return knownInfo->second.ancestors;
766+
}
767+
768+
769+
/// Determine whether the first source location precedes the second, accounting
770+
/// for macro expansions.
771+
static bool isBeforeInSource(
772+
const SourceManager &sourceMgr, SourceLoc firstLoc, SourceLoc secondLoc,
773+
bool allowEqual) {
774+
// If the two locations are in the same source buffer, compare their pointers.
775+
unsigned firstBufferID = sourceMgr.findBufferContainingLoc(firstLoc);
776+
unsigned secondBufferID = sourceMgr.findBufferContainingLoc(secondLoc);
777+
if (firstBufferID == secondBufferID) {
778+
return sourceMgr.isBeforeInBuffer(firstLoc, secondLoc) ||
779+
(allowEqual && firstLoc == secondLoc);
780+
}
781+
782+
// If the two locations are in different source buffers, we need to compute
783+
// the least common ancestor.
784+
unsigned firstScratch, secondScratch;
785+
auto firstAncestors = sourceMgr.getAncestors(firstBufferID, firstScratch);
786+
auto secondAncestors = sourceMgr.getAncestors(secondBufferID, secondScratch);
787+
788+
// Find the first mismatch between the two ancestor lists; this is the
789+
// point of divergence.
790+
auto [firstMismatch, secondMismatch] = std::mismatch(
791+
firstAncestors.begin(), firstAncestors.end(),
792+
secondAncestors.begin(), secondAncestors.end());
793+
assert(firstMismatch != firstAncestors.begin() &&
794+
secondMismatch != secondAncestors.begin() &&
795+
"Ancestors don't have the same root source file");
796+
797+
SourceLoc firstLocInLCA = firstMismatch == firstAncestors.end()
798+
? firstLoc
799+
: sourceMgr.getGeneratedSourceInfo(*firstMismatch)
800+
->originalSourceRange.getStart();
801+
SourceLoc secondLocInLCA = secondMismatch == secondAncestors.end()
802+
? secondLoc
803+
: sourceMgr.getGeneratedSourceInfo(*secondMismatch)
804+
->originalSourceRange.getStart();
805+
return sourceMgr.isBeforeInBuffer(firstLocInLCA, secondLocInLCA) ||
806+
(allowEqual && firstLocInLCA == secondLocInLCA);
807+
}
808+
809+
bool SourceManager::isBefore(SourceLoc first, SourceLoc second) const {
810+
return isBeforeInSource(*this, first, second, /*allowEqual=*/false);
811+
}
812+
813+
bool SourceManager::isAtOrBefore(SourceLoc first, SourceLoc second) const {
814+
return isBeforeInSource(*this, first, second, /*allowEqual=*/true);
815+
}
816+
817+
bool SourceManager::containsTokenLoc(SourceRange range, SourceLoc loc) const {
818+
return isAtOrBefore(range.Start, loc) && isAtOrBefore(loc, range.End);
819+
}

0 commit comments

Comments
 (0)