Skip to content

Commit ab7c51b

Browse files
committed
[libSyntax] Improve data structure in RawSyntax
It turns out that the bitpacked Commons struct is actually fairly expensive because the CPU needs to apply bitmasks to fetch the IsToken and Presence flag. We've got padding space available, so we might as well properly align these boolean flags. Also on a source level, replace a couple of bit-restricted unsigned fields by their representing type (e.g. SyntaxKind). Finally, we can pull out the common bits to RawSyntax and have the Bits union only contain the token- or layout-specific fields. This also allows us to initialise these fields in the constructor's initialiser list (instead of in the initialiser body). Lastly, change copyToArenaIfNecessary to work on a char *& and length, which allows us to initialise leading/trailing trivia/token text in the initialiser list and adjust if necessary later.
1 parent beadd4a commit ab7c51b

File tree

8 files changed

+71
-78
lines changed

8 files changed

+71
-78
lines changed

include/swift/AST/ASTPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ namespace swift {
3737
class NominalTypeDecl;
3838
class ValueDecl;
3939
class SourceLoc;
40-
enum class tok;
40+
enum class tok : uint8_t;
4141
enum class AccessorKind;
4242

4343
/// Describes the context in which a name is being printed, which

include/swift/Parse/ParsedRawSyntaxRecorder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ class SyntaxParseActions;
3232
class SyntaxParsingContext;
3333
class SourceLoc;
3434
class Token;
35-
enum class tok;
35+
enum class tok : uint8_t;
3636

3737
namespace syntax {
38-
enum class SyntaxKind;
38+
enum class SyntaxKind : uint16_t;
3939
}
4040

4141
class ParsedRawSyntaxRecorder final {

include/swift/Parse/Parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ namespace swift {
6060

6161
namespace syntax {
6262
class RawSyntax;
63-
enum class SyntaxKind;
63+
enum class SyntaxKind : uint16_t;
6464
}// end of syntax namespace
6565

6666
/// Different contexts in which BraceItemList are parsed.

include/swift/Parse/SyntaxParseActions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ namespace swift {
2727
class ParsedTriviaPiece;
2828
class SourceFile;
2929
class SourceLoc;
30-
enum class tok;
30+
enum class tok : uint8_t;
3131

3232
namespace syntax {
3333
class SourceFileSyntax;
34-
enum class SyntaxKind;
34+
enum class SyntaxKind : uint16_t;
3535
}
3636

3737
typedef const void *OpaqueSyntaxNode;

include/swift/Parse/SyntaxParsingContext.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ class ParsedSyntax;
2727
class ParsedTokenSyntax;
2828
struct ParsedTrivia;
2929
class SourceFile;
30-
enum class tok;
30+
enum class tok : uint8_t;
3131
class Token;
3232
class DiagnosticEngine;
3333

3434
namespace syntax {
35-
enum class SyntaxKind;
35+
enum class SyntaxKind : uint16_t;
3636
}
3737

3838
enum class SyntaxContextKind {

include/swift/Syntax/RawSyntax.h

Lines changed: 61 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ template <typename CursorType> constexpr CursorIndex cursorIndex(CursorType C) {
136136
/// An indicator of whether a Syntax node was found or written in the source.
137137
///
138138
/// This is not an 'implicit' bit.
139-
enum class SourcePresence {
139+
enum class SourcePresence : uint8_t {
140140
/// The syntax was authored by a human and found, or was generated.
141141
Present,
142142

@@ -164,52 +164,54 @@ class RawSyntax final
164164
/// have a manually specified id
165165
static SyntaxNodeId NextFreeNodeId;
166166

167-
/// An ID of this node that is stable across incremental parses
168-
SyntaxNodeId NodeId;
169-
170167
/// The \c SyntaxArena in which this node was allocated.
171168
SyntaxArena *Arena;
172169

173-
union {
174-
struct {
175-
/// Number of bytes this node takes up spelled out in the source code.
176-
/// Always 0 if the node is missing.
177-
unsigned TextLength : 32;
178-
/// Whether this piece of syntax was actually present in the source.
179-
unsigned Presence : 1;
180-
unsigned IsToken : 1;
181-
} Common;
182-
enum { NumRawSyntaxBits = 32 + 1 + 1 };
183-
184-
// For "layout" nodes.
185-
struct {
186-
static_assert(NumRawSyntaxBits <= 64,
187-
"Only 64 bits reserved for standard syntax bits");
188-
uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 32 bits
189-
/// Number of children this "layout" node has.
190-
unsigned NumChildren : 32;
191-
/// Total number of sub nodes, i.e. number of transitive children of this
192-
/// node. This does not include the node itself.
193-
unsigned TotalSubNodeCount : 32;
194-
/// The kind of syntax this node represents.
195-
unsigned Kind : bitmax(NumSyntaxKindBits, 8);
196-
} Layout;
197-
198-
// For "token" nodes.
199-
struct {
200-
static_assert(NumRawSyntaxBits <= 64,
201-
"Only 64 bits reserved for standard syntax bits");
202-
uint64_t : bitmax(NumRawSyntaxBits, 64); // align to 16 bits
203-
/// The kind of token this "token" node represents.
204-
const char *LeadingTrivia;
205-
const char *TokenText;
206-
const char *TrailingTrivia;
207-
unsigned LeadingTriviaLength : 32;
208-
unsigned TokenLength : 32;
209-
unsigned TrailingTriviaLength : 32;
210-
unsigned TokenKind : 16;
211-
} Token;
212-
} Bits;
170+
/// An ID of this node that is stable across incremental parses
171+
SyntaxNodeId NodeId;
172+
173+
/// Number of bytes this node takes up spelled out in the source code.
174+
/// Always 0 if the node is missing.
175+
uint32_t TextLength;
176+
177+
/// Whether this piece of syntax was actually present in the source.
178+
SourcePresence Presence;
179+
180+
/// Whether this node is a token or layout node. Determines if \c Bits should
181+
/// be interpreted as \c LayoutData or \c TokenData.
182+
bool IsToken;
183+
184+
struct LayoutData {
185+
/// Number of children this "layout" node has.
186+
uint32_t NumChildren;
187+
/// Total number of sub nodes, i.e. number of transitive children of this
188+
/// node. This does not include the node itself.
189+
uint32_t TotalSubNodeCount;
190+
/// The kind of syntax this node represents.
191+
SyntaxKind Kind;
192+
};
193+
194+
struct TokenData {
195+
/// The pointers to the leading/trailing trivia and token texts. If their
196+
/// lengths are greater than 0, these always reside in the node's \c Arena.
197+
const char *LeadingTrivia;
198+
const char *TokenText;
199+
const char *TrailingTrivia;
200+
uint32_t LeadingTriviaLength;
201+
uint32_t TokenLength;
202+
uint32_t TrailingTriviaLength;
203+
/// The kind of token this "token" node represents.
204+
tok TokenKind;
205+
};
206+
207+
union BitsData {
208+
LayoutData Layout;
209+
TokenData Token;
210+
211+
BitsData(const LayoutData &Layout) : Layout(Layout) {}
212+
BitsData(const TokenData &Token) : Token(Token) {}
213+
};
214+
BitsData Bits;
213215

214216
size_t numTrailingObjects(OverloadToken<const RawSyntax *>) const {
215217
return isToken() ? 0 : Bits.Layout.NumChildren;
@@ -224,17 +226,19 @@ class RawSyntax final
224226
RawSyntax(SyntaxKind Kind, ArrayRef<const RawSyntax *> Layout,
225227
size_t TextLength, SourcePresence Presence,
226228
const RC<SyntaxArena> &Arena, llvm::Optional<SyntaxNodeId> NodeId)
227-
: Arena(Arena.get()),
228-
Bits({{unsigned(TextLength), unsigned(Presence), false}}) {
229+
: Arena(Arena.get()), TextLength(uint32_t(TextLength)),
230+
Presence(Presence), IsToken(false),
231+
Bits(LayoutData{uint32_t(Layout.size()),
232+
/*TotalSubNodeCount=*/0, /*set in body*/
233+
Kind}) {
229234
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
230235
assert(
231236
Kind != SyntaxKind::Token &&
232237
"'token' syntax node must be constructed with dedicated constructor");
233238

234-
size_t TotalSubNodeCount = 0;
235239
for (auto Child : Layout) {
236240
if (Child) {
237-
TotalSubNodeCount += Child->getTotalSubNodeCount() + 1;
241+
Bits.Layout.TotalSubNodeCount += Child->getTotalSubNodeCount() + 1;
238242
// If the child is stored in a different arena, it needs to stay alive
239243
// as long as this node's arena is alive.
240244
Arena->addChildArena(Child->Arena);
@@ -247,9 +251,6 @@ class RawSyntax final
247251
} else {
248252
this->NodeId = NextFreeNodeId++;
249253
}
250-
Bits.Layout.NumChildren = Layout.size();
251-
Bits.Layout.TotalSubNodeCount = TotalSubNodeCount;
252-
Bits.Layout.Kind = unsigned(Kind);
253254

254255
// Initialize layout data.
255256
std::uninitialized_copy(Layout.begin(), Layout.end(),
@@ -265,8 +266,11 @@ class RawSyntax final
265266
StringRef LeadingTrivia, StringRef TrailingTrivia,
266267
SourcePresence Presence, const RC<SyntaxArena> &Arena,
267268
llvm::Optional<SyntaxNodeId> NodeId)
268-
: Arena(Arena.get()),
269-
Bits({{unsigned(TextLength), unsigned(Presence), true}}) {
269+
: Arena(Arena.get()), TextLength(uint32_t(TextLength)),
270+
Presence(Presence), IsToken(true),
271+
Bits(TokenData{LeadingTrivia.data(), Text.data(), TrailingTrivia.data(),
272+
uint32_t(LeadingTrivia.size()), uint32_t(Text.size()),
273+
uint32_t(TrailingTrivia.size()), TokKind}) {
270274
assert(Arena && "RawSyntax nodes must always be allocated in an arena");
271275

272276
if (Presence == SourcePresence::Missing) {
@@ -282,14 +286,6 @@ class RawSyntax final
282286
} else {
283287
this->NodeId = NextFreeNodeId++;
284288
}
285-
Bits.Token.LeadingTrivia = LeadingTrivia.data();
286-
Bits.Token.TokenText = Text.data();
287-
Bits.Token.TrailingTrivia = TrailingTrivia.data();
288-
Bits.Token.LeadingTriviaLength = LeadingTrivia.size();
289-
Bits.Token.TokenLength = Text.size();
290-
Bits.Token.TrailingTriviaLength = TrailingTrivia.size();
291-
Bits.Token.TokenKind = unsigned(TokKind);
292-
293289
Arena->copyStringToArenaIfNecessary(Bits.Token.LeadingTrivia,
294290
Bits.Token.LeadingTriviaLength);
295291
Arena->copyStringToArenaIfNecessary(Bits.Token.TokenText,
@@ -388,11 +384,11 @@ class RawSyntax final
388384
RC<SyntaxArena> getArena() const { return RC<SyntaxArena>(Arena); }
389385

390386
SourcePresence getPresence() const {
391-
return static_cast<SourcePresence>(Bits.Common.Presence);
387+
return static_cast<SourcePresence>(Presence);
392388
}
393389

394390
SyntaxKind getKind() const {
395-
if (Bits.Common.IsToken) {
391+
if (isToken()) {
396392
return SyntaxKind::Token;
397393
} else {
398394
return static_cast<SyntaxKind>(Bits.Layout.Kind);
@@ -442,7 +438,7 @@ class RawSyntax final
442438
bool isUnknown() const { return isUnknownKind(getKind()); }
443439

444440
/// Return true if this raw syntax node is a token.
445-
bool isToken() const { return Bits.Common.IsToken; }
441+
bool isToken() const { return IsToken; }
446442

447443
/// \name Getter routines for SyntaxKind::Token.
448444
/// @{
@@ -532,7 +528,7 @@ class RawSyntax final
532528

533529
/// Return the number of bytes this node takes when spelled out in the source
534530
/// including trivia.
535-
size_t getTextLength() const { return Bits.Common.TextLength; }
531+
size_t getTextLength() const { return TextLength; }
536532

537533
/// @}
538534

include/swift/Syntax/SyntaxKind.h.gyb

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
namespace swift {
3232
namespace syntax {
3333

34-
enum class SyntaxKind {
34+
enum class SyntaxKind : uint16_t {
3535
Token,
3636
% for name, nodes in grouped_nodes.items():
3737
% for node in nodes:
@@ -48,9 +48,6 @@ enum class SyntaxKind {
4848
// NOTE: Unknown must be the last kind.
4949
Unknown,
5050
};
51-
enum : unsigned {
52-
NumSyntaxKindBits = countBitsUsed(static_cast<unsigned>(SyntaxKind::Unknown))
53-
};
5451

5552
void dumpSyntaxKind(llvm::raw_ostream &os, const SyntaxKind kind);
5653

include/swift/Syntax/TokenKinds.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#include "llvm/Support/raw_ostream.h"
2222

2323
namespace swift {
24-
enum class tok {
24+
enum class tok : uint8_t {
2525
#define TOKEN(X) X,
2626
#include "swift/Syntax/TokenKinds.def"
2727

0 commit comments

Comments
 (0)