Skip to content

Commit 6a43596

Browse files
committed
Requestify regex pattern parsing
Instead of doing the pattern parsing in both the C++ parser and ASTGen, factor out the parsing into a request that returns the pattern to emit, regex type, and version. This can then be lazily run during type-checking.
1 parent 21cbfc4 commit 6a43596

File tree

17 files changed

+211
-240
lines changed

17 files changed

+211
-240
lines changed

include/swift/AST/ASTBridging.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,17 +1405,11 @@ BridgedPrefixUnaryExpr
14051405
BridgedPrefixUnaryExpr_createParsed(BridgedASTContext cContext,
14061406
BridgedExpr oper, BridgedExpr operand);
14071407

1408-
SWIFT_NAME("BridgedRegexLiteralExpr."
1409-
"allocateCaptureStructureSerializationBuffer(_:size:)")
1410-
BridgedData BridgedRegexLiteralExpr_allocateCaptureStructureSerializationBuffer(
1411-
BridgedASTContext cContext, SwiftInt size);
1412-
1413-
SWIFT_NAME("BridgedRegexLiteralExpr.createParsed(_:loc:regexText:version:"
1414-
"captureStructure:)")
1415-
BridgedRegexLiteralExpr BridgedRegexLiteralExpr_createParsed(
1416-
BridgedASTContext cContext, BridgedSourceLoc cLoc,
1417-
BridgedStringRef cRegexText, SwiftInt version,
1418-
BridgedData cCaptureStructure);
1408+
SWIFT_NAME("BridgedRegexLiteralExpr.createParsed(_:loc:regexText:)")
1409+
BridgedRegexLiteralExpr
1410+
BridgedRegexLiteralExpr_createParsed(BridgedASTContext cContext,
1411+
BridgedSourceLoc cLoc,
1412+
BridgedStringRef cRegexText);
14191413

14201414
SWIFT_NAME("BridgedSequenceExpr.createParsed(_:exprs:)")
14211415
BridgedSequenceExpr BridgedSequenceExpr_createParsed(BridgedASTContext cContext,

include/swift/AST/Expr.h

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -993,39 +993,32 @@ class InterpolatedStringLiteralExpr : public LiteralExpr {
993993

994994
/// A regular expression literal e.g '(a|c)*'.
995995
class RegexLiteralExpr : public LiteralExpr {
996+
ASTContext *Ctx;
996997
SourceLoc Loc;
997-
StringRef RegexText;
998-
unsigned Version;
999-
ArrayRef<uint8_t> SerializedCaptureStructure;
998+
StringRef ParsedRegexText;
1000999

1001-
RegexLiteralExpr(SourceLoc loc, StringRef regexText, unsigned version,
1002-
ArrayRef<uint8_t> serializedCaps,
1000+
RegexLiteralExpr(ASTContext *ctx, SourceLoc loc, StringRef parsedRegexText,
10031001
bool isImplicit)
1004-
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Loc(loc),
1005-
RegexText(regexText), Version(version),
1006-
SerializedCaptureStructure(serializedCaps) {}
1002+
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Ctx(ctx), Loc(loc),
1003+
ParsedRegexText(parsedRegexText) {}
10071004

10081005
public:
1009-
static RegexLiteralExpr *createParsed(
1010-
ASTContext &ctx, SourceLoc loc, StringRef regexText, unsigned version,
1011-
ArrayRef<uint8_t> serializedCaptureStructure);
1006+
static RegexLiteralExpr *createParsed(ASTContext &ctx, SourceLoc loc,
1007+
StringRef regexText);
10121008

1013-
typedef uint16_t CaptureStructureSerializationVersion;
1009+
ASTContext &getASTContext() const { return *Ctx; }
10141010

1015-
static unsigned getCaptureStructureSerializationAllocationSize(
1016-
unsigned regexLength) {
1017-
return sizeof(CaptureStructureSerializationVersion) + regexLength + 1;
1018-
}
1011+
/// Retrieve the raw parsed regex text.
1012+
StringRef getParsedRegexText() const { return ParsedRegexText; }
10191013

1020-
/// Retrieve the raw regex text.
1021-
StringRef getRegexText() const { return RegexText; }
1014+
/// Retrieve the regex pattern to emit.
1015+
StringRef getRegexToEmit() const;
10221016

1023-
/// Retrieve the version of the regex string.
1024-
unsigned getVersion() const { return Version; }
1017+
/// Retrieve the computed type for the regex.
1018+
Type getRegexType() const;
10251019

1026-
ArrayRef<uint8_t> getSerializedCaptureStructure() {
1027-
return SerializedCaptureStructure;
1028-
}
1020+
/// Retrieve the version of the regex string.
1021+
unsigned getVersion() const;
10291022

10301023
SourceRange getSourceRange() const { return Loc; }
10311024

@@ -6545,9 +6538,8 @@ void simple_display(llvm::raw_ostream &out, const ClosureExpr *CE);
65456538
void simple_display(llvm::raw_ostream &out, const DefaultArgumentExpr *expr);
65466539
void simple_display(llvm::raw_ostream &out, const Expr *expr);
65476540

6548-
SourceLoc extractNearestSourceLoc(const DefaultArgumentExpr *expr);
6549-
SourceLoc extractNearestSourceLoc(const MacroExpansionExpr *expr);
65506541
SourceLoc extractNearestSourceLoc(const ClosureExpr *expr);
6542+
SourceLoc extractNearestSourceLoc(const Expr *expr);
65516543

65526544
} // end namespace swift
65536545

include/swift/AST/TypeCheckRequests.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5088,6 +5088,31 @@ class SuppressesConformanceRequest
50885088
bool isCached() const { return true; }
50895089
};
50905090

5091+
struct RegexLiteralPatternInfo {
5092+
StringRef RegexToEmit;
5093+
Type RegexType;
5094+
size_t Version;
5095+
};
5096+
5097+
/// Parses the regex pattern for a given regex literal using the
5098+
/// compiler's regex parsing library, and returns the resulting info.
5099+
class RegexLiteralPatternInfoRequest
5100+
: public SimpleRequest<RegexLiteralPatternInfoRequest,
5101+
RegexLiteralPatternInfo(const RegexLiteralExpr *),
5102+
RequestFlags::Cached> {
5103+
public:
5104+
using SimpleRequest::SimpleRequest;
5105+
5106+
private:
5107+
friend SimpleRequest;
5108+
5109+
RegexLiteralPatternInfo evaluate(Evaluator &evaluator,
5110+
const RegexLiteralExpr *regex) const;
5111+
5112+
public:
5113+
bool isCached() const { return true; }
5114+
};
5115+
50915116
class IsUnsafeRequest
50925117
: public SimpleRequest<IsUnsafeRequest,
50935118
bool(Decl *decl),

include/swift/AST/TypeCheckerTypeIDZone.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,10 @@ SWIFT_REQUEST(TypeChecker, SuppressesConformanceRequest,
589589
bool(NominalTypeDecl *decl, KnownProtocolKind kp),
590590
SeparatelyCached, NoLocationInfo)
591591

592+
SWIFT_REQUEST(TypeChecker, RegexLiteralPatternInfoRequest,
593+
RegexLiteralPatternInfo(const RegexLiteralExpr *),
594+
Cached, NoLocationInfo)
595+
592596
SWIFT_REQUEST(TypeChecker, CaptureInfoRequest,
593597
CaptureInfo(AbstractFunctionDecl *),
594598
SeparatelyCached, NoLocationInfo)

lib/AST/ASTDumper.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,8 +2280,8 @@ class PrintExpr : public ExprVisitor<PrintExpr, void, StringRef>,
22802280
}
22812281
void visitRegexLiteralExpr(RegexLiteralExpr *E, StringRef label) {
22822282
printCommon(E, "regex_literal_expr", label);
2283-
2284-
printFieldQuoted(E->getRegexText(), "text", LiteralValueColor);
2283+
2284+
printFieldQuoted(E->getParsedRegexText(), "text", LiteralValueColor);
22852285
printInitializerField(E->getInitializer(), "initializer");
22862286

22872287
printFoot();

lib/AST/ASTPrinter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4748,7 +4748,7 @@ void PrintAST::visitBooleanLiteralExpr(BooleanLiteralExpr *expr) {
47484748
}
47494749

47504750
void PrintAST::visitRegexLiteralExpr(RegexLiteralExpr *expr) {
4751-
Printer << expr->getRegexText();
4751+
Printer << expr->getParsedRegexText();
47524752
}
47534753

47544754
void PrintAST::visitErrorExpr(ErrorExpr *expr) {

lib/AST/Bridging/ExprBridging.cpp

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -330,25 +330,12 @@ BridgedPrefixUnaryExpr_createParsed(BridgedASTContext cContext,
330330
operand.unbridged());
331331
}
332332

333-
BridgedData BridgedRegexLiteralExpr_allocateCaptureStructureSerializationBuffer(
334-
BridgedASTContext cContext, SwiftInt size) {
335-
auto buf = cContext.unbridged().AllocateUninitialized<uint8_t>(
336-
RegexLiteralExpr::getCaptureStructureSerializationAllocationSize(
337-
unsigned(size)));
338-
return BridgedData(reinterpret_cast<const char *>(buf.data()), buf.size());
339-
}
340-
341-
BridgedRegexLiteralExpr BridgedRegexLiteralExpr_createParsed(
342-
BridgedASTContext cContext, BridgedSourceLoc cLoc,
343-
BridgedStringRef cRegexText, SwiftInt version,
344-
BridgedData cCaptureStructure) {
345-
ArrayRef<uint8_t> captures(
346-
reinterpret_cast<const uint8_t *>(cCaptureStructure.BaseAddress),
347-
cCaptureStructure.Length);
348-
333+
BridgedRegexLiteralExpr
334+
BridgedRegexLiteralExpr_createParsed(BridgedASTContext cContext,
335+
BridgedSourceLoc cLoc,
336+
BridgedStringRef cRegexText) {
349337
return RegexLiteralExpr::createParsed(cContext.unbridged(), cLoc.unbridged(),
350-
cRegexText.unbridged(),
351-
unsigned(version), captures);
338+
cRegexText.unbridged());
352339
}
353340

354341
BridgedSequenceExpr BridgedSequenceExpr_createParsed(BridgedASTContext cContext,

lib/AST/Expr.cpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2738,12 +2738,27 @@ SourceRange TapExpr::getSourceRange() const {
27382738
Body->getSourceRange());
27392739
}
27402740

2741-
RegexLiteralExpr *
2742-
RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
2743-
StringRef regexText, unsigned version,
2744-
ArrayRef<uint8_t> serializedCaps) {
2745-
return new (ctx) RegexLiteralExpr(loc, regexText, version, serializedCaps,
2746-
/*implicit*/ false);
2741+
RegexLiteralExpr *RegexLiteralExpr::createParsed(ASTContext &ctx, SourceLoc loc,
2742+
StringRef regexText) {
2743+
return new (ctx) RegexLiteralExpr(&ctx, loc, regexText, /*implicit*/ false);
2744+
}
2745+
2746+
StringRef RegexLiteralExpr::getRegexToEmit() const {
2747+
auto &eval = getASTContext().evaluator;
2748+
return evaluateOrDefault(eval, RegexLiteralPatternInfoRequest{this}, {})
2749+
.RegexToEmit;
2750+
}
2751+
2752+
Type RegexLiteralExpr::getRegexType() const {
2753+
auto &eval = getASTContext().evaluator;
2754+
return evaluateOrDefault(eval, RegexLiteralPatternInfoRequest{this}, {})
2755+
.RegexType;
2756+
}
2757+
2758+
unsigned RegexLiteralExpr::getVersion() const {
2759+
auto &eval = getASTContext().evaluator;
2760+
return evaluateOrDefault(eval, RegexLiteralPatternInfoRequest{this}, {})
2761+
.Version;
27472762
}
27482763

27492764
TypeJoinExpr::TypeJoinExpr(llvm::PointerUnion<DeclRefExpr *, TypeBase *> result,
@@ -2849,11 +2864,7 @@ SourceLoc swift::extractNearestSourceLoc(const ClosureExpr *expr) {
28492864
return expr->getLoc();
28502865
}
28512866

2852-
SourceLoc swift::extractNearestSourceLoc(const DefaultArgumentExpr *expr) {
2853-
return expr->getLoc();
2854-
}
2855-
2856-
SourceLoc swift::extractNearestSourceLoc(const MacroExpansionExpr *expr) {
2867+
SourceLoc swift::extractNearestSourceLoc(const Expr *expr) {
28572868
return expr->getLoc();
28582869
}
28592870

lib/ASTGen/Sources/ASTGen/Exprs.swift

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ extension ASTGenVisitor {
155155
case .prefixOperatorExpr(let node):
156156
return self.generate(prefixOperatorExpr: node).asExpr
157157
case .regexLiteralExpr(let node):
158-
return self.generate(regexLiteralExpr: node)
158+
return self.generate(regexLiteralExpr: node).asExpr
159159
case .sequenceExpr(let node):
160160
return self.generate(sequenceExpr: node)
161161
case .simpleStringLiteralExpr:
@@ -559,6 +559,19 @@ extension ASTGenVisitor {
559559
)
560560
}
561561

562+
func generate(regexLiteralExpr node: RegexLiteralExprSyntax) -> BridgedRegexLiteralExpr {
563+
// Copy the regex string to the ASTContext.
564+
var str = node.trimmedDescription
565+
let regexText = str.withBridgedString {
566+
self.ctx.allocateCopy(string: $0)
567+
}
568+
return .createParsed(
569+
self.ctx,
570+
loc: self.generateSourceLoc(node),
571+
regexText: regexText
572+
)
573+
}
574+
562575
func generate(sequenceExpr node: SequenceExprSyntax) -> BridgedExpr {
563576
assert(
564577
!node.elements.count.isMultiple(of: 2),

lib/ASTGen/Sources/ASTGen/Regex.swift

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -18,56 +18,6 @@ import SwiftSyntax
1818
#if canImport(_CompilerRegexParser)
1919
@_spi(CompilerInterface) import _CompilerRegexParser
2020

21-
extension ASTGenVisitor {
22-
func generate(regexLiteralExpr node: RegexLiteralExprSyntax) -> BridgedExpr {
23-
let str = node.trimmedDescription
24-
let captureBuffer = BridgedRegexLiteralExpr
25-
.allocateCaptureStructureSerializationBuffer(self.ctx, size: str.utf8.count)
26-
let captureBufferOut = UnsafeMutableRawBufferPointer(
27-
start: UnsafeMutableRawPointer(mutating: captureBuffer.baseAddress),
28-
count: captureBuffer.count
29-
)
30-
31-
let loc = self.generateSourceLoc(node);
32-
33-
do {
34-
// FIXME: We need to plumb through the 'regexToEmit' result to the caller.
35-
// For now, it is the same as the input.
36-
var regexToEmit: String
37-
let version: Int
38-
(regexToEmit, version) = try swiftCompilerParseRegexLiteral(
39-
str,
40-
captureBufferOut: captureBufferOut
41-
)
42-
// Copy the regex string to the ASTContext.
43-
let regexToEmitStr = regexToEmit.withBridgedString {
44-
self.ctx.allocateCopy(string: $0)
45-
}
46-
47-
return BridgedRegexLiteralExpr.createParsed(
48-
self.ctx,
49-
loc: loc,
50-
regexText: regexToEmitStr,
51-
version: version,
52-
captureStructure: captureBuffer
53-
).asExpr
54-
} catch let error as _CompilerRegexParser.CompilerParseError {
55-
let offset = error.location != nil ? str.utf8.offset(of: error.location!) : 0
56-
let position = node.positionAfterSkippingLeadingTrivia.advanced(by: offset)
57-
self.diagnose(
58-
Diagnostic(
59-
node: node.regex,
60-
position: position,
61-
message: RegexParserError(error.message)
62-
)
63-
)
64-
return BridgedErrorExpr.create(self.ctx, loc: BridgedSourceRange(start: loc, end: loc)).asExpr
65-
} catch {
66-
fatalError("Expected CompilerParseError")
67-
}
68-
}
69-
}
70-
7121
/// Bridging between C++ lexer and swiftCompilerLexRegexLiteral.
7222
///
7323
/// Attempt to lex a regex literal string.

0 commit comments

Comments
 (0)