Skip to content

Commit e1e0f54

Browse files
authored
Merge pull request swiftlang#36350 from ahoppen/pr/syntaxref
[libSyntax] Create SyntaxRef, which uses SyntaxDataRef internally
2 parents f7410ff + 3fb5dd1 commit e1e0f54

File tree

6 files changed

+623
-51
lines changed

6 files changed

+623
-51
lines changed

include/swift/Syntax/Syntax.h

Lines changed: 336 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,29 @@
1111
//===----------------------------------------------------------------------===//
1212
//
1313
// This file defines the Syntax type, the main public-facing classes and
14-
// subclasses for dealing with Swift Syntax.
14+
// subclasses for dealing with Swift Syntax. It essentially wraps
15+
// SyntaxData(Ref) and provides convenience APIs (like retrieving children)
16+
// based on the syntax kind.
1517
//
16-
// Syntax types contain a strong reference to the root of the tree to keep
17-
// the subtree above alive, and a weak reference to the data representing
18-
// the syntax node (weak to prevent retain cycles). All significant public API
19-
// are contained in Syntax and its subclasses.
18+
// There are two versions of the Syntax type.
19+
// SyntaxRef:
20+
// SyntaxRef is designed around efficiency. It *does not* retain the
21+
// SyntaxDataRef that stores its data - the user must gurantee that the
22+
// SyntaxDataRef outlives the SyntaxRef that references it. Instead,
23+
// SyntaxDataRef provides a *view* into the SyntaxDataRef and the view provides
24+
// all convinience APIs. The advantage of this is that the underlying SyntaxData
25+
// can be stack-allocated and does not need to be copied when the the SyntaxRef
26+
// is being passsed around or when the SyntaxRef is being casted.
2027
//
28+
// Syntax:
29+
// The syntax nodes are designed for memory safety. Syntax nodes always retain
30+
// (and ref-count) heap-allocated SyntaxData nodes. While this provides maximum
31+
// memory safety, the heap allocations and the ref-counting has a significant
32+
// performance overhead.
33+
//
34+
// Note that the two access modes can also be mixed. When a syntax tree is
35+
// accessed by Syntax (memory-safe) nodes, they can be demoted to SyntaxRef
36+
// nodes to perform perfomance-critical tasks.
2137
//===----------------------------------------------------------------------===//
2238

2339
#ifndef SWIFT_SYNTAX_SYNTAX_H
@@ -47,19 +63,137 @@ SyntaxNode makeRoot(const RawSyntax *Raw) {
4763

4864
const auto NoParent = llvm::None;
4965

50-
/// The main handle for syntax nodes - subclasses contain all public
51-
/// structured editing APIs.
66+
/// Marker type to construct \c SyntaxRef nodes without validation. This is used
67+
/// to create \c SyntaxRef inside \c OwnedSyntaxRef and \c
68+
/// OptionalOwnedSyntaxRef that point to a \c SyntaxDataRef which is yet to be
69+
/// initialised.
70+
/// Validation will occur in these types once the \c SyntaxRef is accessed.
71+
struct no_validation_t {};
72+
73+
// MARK: - OwnedSyntaxRef
74+
75+
/// Holds a \c SyntaxDataRef and provides a \c SyntaxRef (or one of its
76+
/// subclasses) as an accessor to the \c SyntaxDataRef.
77+
/// The user of this type needs to make sure that the \c OwnedSyntaxRef always
78+
/// outlives the \c SyntaxRef provided by it, because otherwise the \c SyntaxRef
79+
/// points to invalid memory.
80+
/// It allows transparent access to the \c SyntaxRef through the \c -> operator.
81+
///
82+
/// All methods that return a \c OwnedSyntaxRef should be inlined to avoid
83+
/// copying the \c SyntaxDataRef, which is rather expensive because the struct
84+
/// is rather large.
5285
///
53-
/// Essentially, this is a wrapper around \c SyntaxData that provides
54-
/// convenience methods based on the node's kind.
86+
/// A typical initialisation of a OwnedSyntaxRef looks as follows:
87+
/// \code
88+
/// OwnedSyntaxRef<MySyntaxRef> Result;
89+
/// someSyntaxDataRef.getChildRef(Index, Result.getDataPtr());
90+
/// \endcode
91+
/// The first line creates an empty \c OwnedSyntaxRef with uninitialised memory.
92+
/// The second line invokes a method that fills \c Data of \c OwnedSyntaxRef.
93+
/// This way, we directly write the \c SyntaxDataRef to the correct memory
94+
/// location and avoid copying it around.
95+
template <typename SyntaxRefType>
96+
class OwnedSyntaxRef {
97+
SyntaxDataRef Data;
98+
SyntaxRefType Ref;
99+
100+
public:
101+
/// Create an *uninintialized* \c OwnedSyntaxRef. Its storage needs to be
102+
/// initialised by writing a \c SyntaxDataRef to the pointer returned by
103+
/// \c getDataPtr()
104+
/// Implementation Note: We need to initialise \c Ref without validation,
105+
/// because \c Data is still uninitialised. \c Ref will be validated when
106+
/// accessed using \c getRef or \c -> .
107+
OwnedSyntaxRef() : Data(), Ref(getDataPtr(), no_validation_t()) {}
108+
109+
OwnedSyntaxRef(const OwnedSyntaxRef &Other)
110+
: Data(Other.Data), Ref(getDataPtr(), no_validation_t()) {}
111+
OwnedSyntaxRef(OwnedSyntaxRef &&Other)
112+
: Data(std::move(Other.Data)), Ref(getDataPtr(), no_validation_t()) {}
113+
114+
/// The pointer to the location at which \c this stores the \c Data.
115+
/// Can be used to retroactively populate the \c Data after \c OwnedSyntaxRef
116+
/// has been constructed with uninitialised memory.
117+
SyntaxDataRef *getDataPtr() { return &Data; }
118+
119+
const SyntaxRefType &getRef() {
120+
assert(Ref.getDataRef() == getDataPtr() &&
121+
"Ref no longer pointing to Data?");
122+
#ifndef NDEBUG
123+
// This might be the first access to Ref after Data has been modified.
124+
// Validate the node.
125+
Ref.validate();
126+
#endif
127+
return Ref;
128+
}
129+
130+
const SyntaxRefType *operator->() {
131+
assert(Ref.getDataRef() == getDataPtr() &&
132+
"Ref no longer pointing to Data?");
133+
#ifndef NDEBUG
134+
// This might be the first access to Ref after Data has been modified.
135+
// Validate the node.
136+
Ref.validate();
137+
#endif
138+
return &Ref;
139+
}
140+
};
141+
142+
/// Same as \c OwnedSyntaxRef but can be null. We don't use \c
143+
/// Optional<OwnedSyntaxRef<SyntaxRefType>>>, because then we couldn't access
144+
/// the underlying \c SytnaxRefType via the \c -> operator (the use of \c ->
145+
/// would access the \c OwnedSyntaxRef<SyntaxRefType> wrapped by \c Optional and
146+
/// not the \c SyntaxRefType wrapped by \c OwnedSyntaxRef.
147+
template <typename SyntaxRefType>
148+
class OptionalOwnedSyntaxRef {
149+
Optional<SyntaxDataRef> Data;
150+
SyntaxRefType Ref;
151+
152+
public:
153+
OptionalOwnedSyntaxRef() : Data(), Ref(getDataPtr(), no_validation_t()) {}
154+
155+
OptionalOwnedSyntaxRef(const OptionalOwnedSyntaxRef &Other)
156+
: Data(Other.Data), Ref(getDataPtr(), no_validation_t()) {}
157+
OptionalOwnedSyntaxRef(OptionalOwnedSyntaxRef &&Other)
158+
: Data(std::move(Other.Data)), Ref(getDataPtr(), no_validation_t()) {}
159+
160+
SyntaxDataRef *getDataPtr() { return Data.getPointer(); }
161+
162+
bool hasValue() const { return Data.hasValue(); }
163+
164+
explicit operator bool() const { return hasValue(); }
165+
166+
const SyntaxRefType &getRef() {
167+
assert(Ref.getDataRef() == getDataPtr() &&
168+
"Ref no longer pointing to Data?");
169+
assert(hasValue() && "Accessing a OptionalOwnedSyntaxRef without a value");
170+
#ifndef NDEBUG
171+
// This might be the first access to Ref after Data has been populated.
172+
// Validate the node.
173+
Ref.validate();
174+
#endif
175+
return Ref;
176+
}
177+
178+
SyntaxRefType *operator->() {
179+
assert(Ref.getDataRef() == getDataPtr() &&
180+
"Ref no longer pointing to Data?");
181+
assert(hasValue() && "OptionalOwnedSyntaxRef doesn't have a value");
182+
return &Ref;
183+
}
184+
};
185+
186+
// MARK: - Syntax
187+
188+
/// See comment on top of file.
55189
class Syntax {
56190
protected:
57-
RC<const SyntaxData> Data;
191+
const RC<const SyntaxData> Data;
58192

59193
public:
60-
explicit Syntax(const RC<const SyntaxData> &Data) : Data(Data) {}
61-
62-
virtual ~Syntax() {}
194+
explicit Syntax(const RC<const SyntaxData> &Data) : Data(Data) {
195+
assert(Data != nullptr && "Syntax must be backed by non-null Data");
196+
}
63197

64198
/// Get the kind of syntax.
65199
SyntaxKind getKind() const;
@@ -194,8 +328,196 @@ class Syntax {
194328
AbsoluteOffsetPosition getAbsoluteEndPositionAfterTrailingTrivia() const {
195329
return Data->getAbsoluteEndPositionAfterTrailingTrivia();
196330
}
331+
};
332+
333+
// MARK: - SyntaxRef
334+
335+
/// See comment on top of file.
336+
class SyntaxRef {
337+
const SyntaxDataRef * const Data;
338+
339+
public:
340+
/// Create a \c SyntaxRef and validate that the \p Data can actually represent
341+
/// a \c SyntaxRef. Validation in particular performs checks for derived
342+
/// types.
343+
explicit SyntaxRef(const SyntaxDataRef *Data) : Data(Data) {
344+
assert(Data != nullptr && "SyntaxRef must reference Data");
345+
this->validate();
346+
}
347+
SyntaxRef(const SyntaxDataRef *Data, no_validation_t) : Data(Data) {
348+
assert(Data != nullptr && "SyntaxRef must reference Data");
349+
}
350+
351+
/// Demote a \c Syntax to a \c SyntaxRef
352+
SyntaxRef(const Syntax &Node) : SyntaxRef(Node.getData().get()) {}
353+
354+
void validate() {}
355+
356+
// MARK: - Get underlying data
357+
358+
/// Get the \c SyntaxDataRef that stores the data of this \c SyntaxRef node.
359+
const SyntaxDataRef *getDataRef() const {
360+
return Data;
361+
}
362+
363+
const AbsoluteRawSyntax &getAbsoluteRaw() const {
364+
return getDataRef()->getAbsoluteRaw();
365+
}
366+
367+
/// Get the shared raw syntax.
368+
const RawSyntax *getRaw() const { return getDataRef()->getRaw(); }
369+
370+
/// Get the kind of syntax.
371+
SyntaxKind getKind() const { return getRaw()->getKind(); }
372+
373+
/// Get an ID for the \c RawSyntax node backing this \c Syntax which is
374+
/// stable across incremental parses.
375+
/// Note that this is different from the \c AbsoluteRawSyntax's \c NodeId,
376+
/// which uniquely identifies this node in the tree, but is not stable across
377+
/// incremental parses.
378+
SyntaxNodeId getId() const { return getRaw()->getId(); }
379+
380+
/// Return the number of bytes this node takes when spelled out in the source,
381+
/// including trivia.
382+
size_t getTextLength() const { return getRaw()->getTextLength(); }
383+
384+
// MARK: Parents/children
385+
386+
/// Return the parent of this node, if it has one, otherwise return \c None.
387+
llvm::Optional<SyntaxRef> getParentRef() const {
388+
if (auto ParentDataRef = getDataRef()->getParentRef()) {
389+
return SyntaxRef(ParentDataRef);
390+
} else {
391+
return None;
392+
}
393+
}
394+
395+
/// Get the number of child nodes in this piece of syntax.
396+
size_t getNumChildren() const { return getDataRef()->getNumChildren(); }
397+
398+
/// Returns the child index of this node in its parent, if it has one,
399+
/// otherwise 0.
400+
CursorIndex getIndexInParent() const {
401+
return getDataRef()->getIndexInParent();
402+
}
403+
404+
/// Get the \p N -th child of this piece of syntax.
405+
OptionalOwnedSyntaxRef<SyntaxRef> getChildRef(const size_t N) const {
406+
OptionalOwnedSyntaxRef<SyntaxRef> Result;
407+
getDataRef()->getChildRef(N, Result.getDataPtr());
408+
return Result;
409+
}
410+
411+
// MARK: Position
412+
413+
/// Get the offset at which the leading trivia of this node starts.
414+
AbsoluteOffsetPosition getAbsolutePositionBeforeLeadingTrivia() const {
415+
return getDataRef()->getAbsolutePositionBeforeLeadingTrivia();
416+
}
417+
418+
/// Get the offset at which the actual content (i.e. non-triva) of this node
419+
/// starts.
420+
AbsoluteOffsetPosition getAbsolutePositionAfterLeadingTrivia() const {
421+
return getDataRef()->getAbsolutePositionAfterLeadingTrivia();
422+
}
197423

198-
// TODO: hasSameStructureAs ?
424+
/// Get the offset at which the trailing trivia of this node starts.
425+
AbsoluteOffsetPosition getAbsoluteEndPositionBeforeTrailingTrivia() const {
426+
return getDataRef()->getAbsoluteEndPositionBeforeTrailingTrivia();
427+
}
428+
429+
/// Get the offset at which the trailing trivia of this node ends.
430+
AbsoluteOffsetPosition getAbsoluteEndPositionAfterTrailingTrivia() const {
431+
return getDataRef()->getAbsoluteEndPositionAfterTrailingTrivia();
432+
}
433+
434+
// MARK: - Get node kind
435+
436+
/// Returns true if this syntax node represents a token.
437+
bool isToken() const { return getRaw()->isToken(); }
438+
439+
/// Returns true if this syntax node represents a statement.
440+
bool isStmt() const { return getRaw()->isStmt(); }
441+
442+
/// Returns true if this syntax node represents a declaration.
443+
bool isDecl() const { return getRaw()->isDecl(); }
444+
445+
/// Returns true if this syntax node represents an expression.
446+
bool isExpr() const { return getRaw()->isExpr(); }
447+
448+
/// Returns true if this syntax node represents a pattern.
449+
bool isPattern() const { return getRaw()->isPattern(); }
450+
451+
/// Returns true if this syntax node represents a type.
452+
bool isType() const { return getRaw()->isType(); }
453+
454+
/// Returns true if this syntax is of some "unknown" kind.
455+
bool isUnknown() const { return getRaw()->isUnknown(); }
456+
457+
/// Returns true if the node is "missing" in the source (i.e. it was
458+
/// expected (or optional) but not written.
459+
bool isMissing() const { return getRaw()->isMissing(); }
460+
461+
/// Returns true if the node is "present" in the source.
462+
bool isPresent() const { return getRaw()->isPresent(); }
463+
464+
// MARK: Casting
465+
466+
/// Returns true if the syntax node is of the given type \p T.
467+
template <typename T>
468+
bool is() const {
469+
return T::classof(this);
470+
}
471+
472+
/// Cast this Syntax node to a more specific type, asserting it's of the
473+
/// right kind \p T.
474+
template <typename T>
475+
T castTo() const {
476+
assert(is<T>() && "castTo<T>() node of incompatible type!");
477+
return T(getDataRef());
478+
}
479+
480+
/// If this Syntax node is of the right kind \p T, cast and return it,
481+
/// otherwise return None.
482+
template <typename T>
483+
llvm::Optional<T> getAs() const {
484+
if (is<T>()) {
485+
return castTo<T>();
486+
} else {
487+
return None;
488+
}
489+
}
490+
491+
static bool kindof(SyntaxKind Kind) { return true; }
492+
493+
static bool classof(const SyntaxRef *S) {
494+
// Trivially true.
495+
return true;
496+
}
497+
498+
// MARK: - Miscellaneous
499+
500+
/// Print the syntax node with full fidelity to the given output stream.
501+
void print(llvm::raw_ostream &OS,
502+
SyntaxPrintOptions Opts = SyntaxPrintOptions()) const {
503+
if (auto Raw = getRaw()) {
504+
Raw->print(OS, Opts);
505+
}
506+
}
507+
508+
/// Print a debug representation of the syntax node to the given output stream
509+
/// and indentation level.
510+
void dump(llvm::raw_ostream &OS, unsigned Indent = 0) const {
511+
getRaw()->dump(OS, Indent);
512+
}
513+
514+
/// Print a debug representation of the syntax node to standard error.
515+
SWIFT_DEBUG_DUMP { getRaw()->dump(); }
516+
517+
bool hasSameIdentityAs(const SyntaxRef &Other) const {
518+
return getDataRef()->getAbsoluteRaw().getNodeId() ==
519+
Other.getDataRef()->getAbsoluteRaw().getNodeId();
520+
}
199521
};
200522

201523
} // end namespace syntax

0 commit comments

Comments
 (0)