Skip to content

Commit 7d39908

Browse files
authored
Merge pull request swiftlang#36279 from ahoppen/pr/improve-rawsyntax-layout
[libSyntax] Improve data structure in RawSyntax
2 parents 7bb494c + ab7c51b commit 7d39908

File tree

8 files changed

+71
-78
lines changed

8 files changed

+71
-78
lines changed

include/swift/AST/ASTPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ namespace swift {
3737
class NominalTypeDecl;
3838
class ValueDecl;
3939
class SourceLoc;
40-
enum class tok;
40+
enum class tok : uint8_t;
4141
enum class AccessorKind;
4242

4343
/// Describes the context in which a name is being printed, which

include/swift/Parse/ParsedRawSyntaxRecorder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ class SyntaxParseActions;
3232
class SyntaxParsingContext;
3333
class SourceLoc;
3434
class Token;
35-
enum class tok;
35+
enum class tok : uint8_t;
3636

3737
namespace syntax {
38-
enum class SyntaxKind;
38+
enum class SyntaxKind : uint16_t;
3939
}
4040

4141
class ParsedRawSyntaxRecorder final {

include/swift/Parse/Parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ namespace swift {
6060

6161
namespace syntax {
6262
class RawSyntax;
63-
enum class SyntaxKind;
63+
enum class SyntaxKind : uint16_t;
6464
}// end of syntax namespace
6565

6666
/// Different contexts in which BraceItemList are parsed.

include/swift/Parse/SyntaxParseActions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ namespace swift {
2727
class ParsedTriviaPiece;
2828
class SourceFile;
2929
class SourceLoc;
30-
enum class tok;
30+
enum class tok : uint8_t;
3131

3232
namespace syntax {
3333
class SourceFileSyntax;
34-
enum class SyntaxKind;
34+
enum class SyntaxKind : uint16_t;
3535
}
3636

3737
typedef const void *OpaqueSyntaxNode;

include/swift/Parse/SyntaxParsingContext.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ class ParsedSyntax;
2727
class ParsedTokenSyntax;
2828
struct ParsedTrivia;
2929
class SourceFile;
30-
enum class tok;
30+
enum class tok : uint8_t;
3131
class Token;
3232
class DiagnosticEngine;
3333

3434
namespace syntax {
35-
enum class SyntaxKind;
35+
enum class SyntaxKind : uint16_t;
3636
}
3737

3838
enum class SyntaxContextKind {

include/swift/Syntax/RawSyntax.h

Lines changed: 61 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ template <typename CursorType> constexpr CursorIndex cursorIndex(CursorType C) {
136136
/// An indicator of whether a Syntax node was found or written in the source.
137137
///
138138
/// This is not an 'implicit' bit.
139-
enum class SourcePresence {
139+
enum class SourcePresence : uint8_t {
140140
/// The syntax was authored by a human and found, or was generated.
141141
Present,
142142

@@ -164,52 +164,54 @@ class RawSyntax final
164164
/// have a manually specified id
165165
static SyntaxNodeId NextFreeNodeId;
166166

167-
/// An ID of this node that is stable across incremental parses
168-
SyntaxNodeId NodeId;
169-
170167
/// The \c SyntaxArena in which this node was allocated.
171168
SyntaxArena *Arena;
172169

173-
union {
174-
struct {
175-
/// Number of bytes this node takes up spelled out in the source code.
176-
/// Always 0 if the node is missing.
177-
unsigned TextLength : 32;
178-
/// Whether this piece of syntax was actually present in the source.
179-
unsigned Presence : 1;
180-
unsigned IsToken : 1;
181-
} Common;
182-
enum { NumRawSyntaxBits = 32 + 1 + 1 };
183-
184-
// For "layout" nodes.
185-
struct {
186-
static_assert(NumRawSyntaxBits <= 64,
187-
"Only 64 bits reserved for standard syntax bits");
188-
uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 32 bits
189-
/// Number of children this "layout" node has.
190-
unsigned NumChildren : 32;
191-
/// Total number of sub nodes, i.e. number of transitive children of this
192-
/// node. This does not include the node itself.
193-
unsigned TotalSubNodeCount : 32;
194-
/// The kind of syntax this node represents.
195-
unsigned Kind : bitmax(NumSyntaxKindBits, 8);
196-
} Layout;
197-
198-
// For "token" nodes.
199-
struct {
200-
static_assert(NumRawSyntaxBits <= 64,
201-
"Only 64 bits reserved for standard syntax bits");
202-
uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 16 bits
203-
/// The kind of token this "token" node represents.
204-
const char *LeadingTrivia;
205-
const char *TokenText;
206-
const char *TrailingTrivia;
207-
unsigned LeadingTriviaLength : 32;
208-
unsigned TokenLength : 32;
209-
unsigned TrailingTriviaLength : 32;
210-
unsigned TokenKind : 16;
211-
} Token;
212-
} Bits;
170+
/// An ID of this node that is stable across incremental parses
171+
SyntaxNodeId NodeId;
172+
173+
/// Number of bytes this node takes up spelled out in the source code.
174+
/// Always 0 if the node is missing.
175+
uint32_t TextLength;
176+
177+
/// Whether this piece of syntax was actually present in the source.
178+
SourcePresence Presence;
179+
180+
/// Whether this node is a token or layout node. Determines if \c Bits should
181+
/// be interpreted as \c LayoutData or \c TokenData.
182+
bool IsToken;
183+
184+
struct LayoutData {
185+
/// Number of children this "layout" node has.
186+
uint32_t NumChildren;
187+
/// Total number of sub nodes, i.e. number of transitive children of this
188+
/// node. This does not include the node itself.
189+
uint32_t TotalSubNodeCount;
190+
/// The kind of syntax this node represents.
191+
SyntaxKind Kind;
192+
};
193+
194+
struct TokenData {
195+
/// The pointers to the leading/trailing trivia and token texts. If their
196+
/// lengths are greater than 0, these always reside in the node's \c Arena.
197+
const char *LeadingTrivia;
198+
const char *TokenText;
199+
const char *TrailingTrivia;
200+
uint32_t LeadingTriviaLength;
201+
uint32_t TokenLength;
202+
uint32_t TrailingTriviaLength;
203+
/// The kind of token this "token" node represents.
204+
tok TokenKind;
205+
};
206+
207+
union BitsData {
208+
LayoutData Layout;
209+
TokenData Token;
210+
211+
BitsData(const LayoutData &Layout) : Layout(Layout) {}
212+
BitsData(const TokenData &Token) : Token(Token) {}
213+
};
214+
BitsData Bits;
213215

214216
size_t numTrailingObjects(OverloadToken<const RawSyntax *>) const {
215217
return isToken() ? 0 : Bits.Layout.NumChildren;
@@ -224,17 +226,19 @@ class RawSyntax final
224226
RawSyntax(SyntaxKind Kind, ArrayRef<const RawSyntax *> Layout,
225227
size_t TextLength, SourcePresence Presence,
226228
const RC<SyntaxArena> &Arena, llvm::Optional<SyntaxNodeId> NodeId)
227-
: Arena(Arena.get()),
228-
Bits({{unsigned(TextLength), unsigned(Presence), false}}) {
229+
: Arena(Arena.get()), TextLength(uint32_t(TextLength)),
230+
Presence(Presence), IsToken(false),
231+
Bits(LayoutData{uint32_t(Layout.size()),
232+
/*TotalSubNodeCount=*/0, /*set in body*/
233+
Kind}) {
229234
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
230235
assert(
231236
Kind != SyntaxKind::Token &&
232237
"'token' syntax node must be constructed with dedicated constructor");
233238

234-
size_t TotalSubNodeCount = 0;
235239
for (auto Child : Layout) {
236240
if (Child) {
237-
TotalSubNodeCount += Child->getTotalSubNodeCount() + 1;
241+
Bits.Layout.TotalSubNodeCount += Child->getTotalSubNodeCount() + 1;
238242
// If the child is stored in a different arena, it needs to stay alive
239243
// as long as this node's arena is alive.
240244
Arena->addChildArena(Child->Arena);
@@ -247,9 +251,6 @@ class RawSyntax final
247251
} else {
248252
this->NodeId = NextFreeNodeId++;
249253
}
250-
Bits.Layout.NumChildren = Layout.size();
251-
Bits.Layout.TotalSubNodeCount = TotalSubNodeCount;
252-
Bits.Layout.Kind = unsigned(Kind);
253254

254255
// Initialize layout data.
255256
std::uninitialized_copy(Layout.begin(), Layout.end(),
@@ -265,8 +266,11 @@ class RawSyntax final
265266
StringRef LeadingTrivia, StringRef TrailingTrivia,
266267
SourcePresence Presence, const RC<SyntaxArena> &Arena,
267268
llvm::Optional<SyntaxNodeId> NodeId)
268-
: Arena(Arena.get()),
269-
Bits({{unsigned(TextLength), unsigned(Presence), true}}) {
269+
: Arena(Arena.get()), TextLength(uint32_t(TextLength)),
270+
Presence(Presence), IsToken(true),
271+
Bits(TokenData{LeadingTrivia.data(), Text.data(), TrailingTrivia.data(),
272+
uint32_t(LeadingTrivia.size()), uint32_t(Text.size()),
273+
uint32_t(TrailingTrivia.size()), TokKind}) {
270274
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
271275

272276
if (Presence == SourcePresence::Missing) {
@@ -282,14 +286,6 @@ class RawSyntax final
282286
} else {
283287
this->NodeId = NextFreeNodeId++;
284288
}
285-
Bits.Token.LeadingTrivia = LeadingTrivia.data();
286-
Bits.Token.TokenText = Text.data();
287-
Bits.Token.TrailingTrivia = TrailingTrivia.data();
288-
Bits.Token.LeadingTriviaLength = LeadingTrivia.size();
289-
Bits.Token.TokenLength = Text.size();
290-
Bits.Token.TrailingTriviaLength = TrailingTrivia.size();
291-
Bits.Token.TokenKind = unsigned(TokKind);
292-
293289
Arena->copyStringToArenaIfNecessary(Bits.Token.LeadingTrivia,
294290
Bits.Token.LeadingTriviaLength);
295291
Arena->copyStringToArenaIfNecessary(Bits.Token.TokenText,
@@ -388,11 +384,11 @@ class RawSyntax final
388384
RC<SyntaxArena> getArena() const { return RC<SyntaxArena>(Arena); }
389385

390386
SourcePresence getPresence() const {
391-
return static_cast<SourcePresence>(Bits.Common.Presence);
387+
return static_cast<SourcePresence>(Presence);
392388
}
393389

394390
SyntaxKind getKind() const {
395-
if (Bits.Common.IsToken) {
391+
if (isToken()) {
396392
return SyntaxKind::Token;
397393
} else {
398394
return static_cast<SyntaxKind>(Bits.Layout.Kind);
@@ -442,7 +438,7 @@ class RawSyntax final
442438
bool isUnknown() const { return isUnknownKind(getKind()); }
443439

444440
/// Return true if this raw syntax node is a token.
445-
bool isToken() const { return Bits.Common.IsToken; }
441+
bool isToken() const { return IsToken; }
446442

447443
/// \name Getter routines for SyntaxKind::Token.
448444
/// @{
@@ -532,7 +528,7 @@ class RawSyntax final
532528

533529
/// Return the number of bytes this node takes when spelled out in the source
534530
/// including trivia.
535-
size_t getTextLength() const { return Bits.Common.TextLength; }
531+
size_t getTextLength() const { return TextLength; }
536532

537533
/// @}
538534

include/swift/Syntax/SyntaxKind.h.gyb

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
namespace swift {
3232
namespace syntax {
3333

34-
enum class SyntaxKind {
34+
enum class SyntaxKind : uint16_t {
3535
Token,
3636
% for name, nodes in grouped_nodes.items():
3737
% for node in nodes:
@@ -48,9 +48,6 @@ enum class SyntaxKind {
4848
// NOTE: Unknown must be the last kind.
4949
Unknown,
5050
};
51-
enum : unsigned {
52-
NumSyntaxKindBits = countBitsUsed(static_cast<unsigned>(SyntaxKind::Unknown))
53-
};
5451

5552
void dumpSyntaxKind(llvm::raw_ostream &os, const SyntaxKind kind);
5653

include/swift/Syntax/TokenKinds.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#include "llvm/Support/raw_ostream.h"
2222

2323
namespace swift {
24-
enum class tok {
24+
enum class tok : uint8_t {
2525
#define TOKEN(X) X,
2626
#include "swift/Syntax/TokenKinds.def"
2727

0 commit comments

Comments
 (0)