Skip to content

Commit 37f1652

Browse files
committed
Prototype regex literal AST and emission
With `-enable-experimental-string-processing`, start lexing `'` delimiters as regex literals (this is just a placeholder delimiter for now). The contents of which gets passed to the libswift library, which can return an error string to be emitted, or null for success. The libswift side isn't yet hooked up to the Swift regex parser, so for now just emit a dummy diagnostic for regexes starting with quantifiers. If successful, build an AST node which will be emitted as an implicit call to an `init(_regexString:)` initializer of an in-scope `Regex` decl (which will eventually be a known stdlib decl).
1 parent c0f7143 commit 37f1652

28 files changed

+304
-42
lines changed

include/swift/AST/DiagnosticsParse.def

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ ERROR(forbidden_interpolated_string,none,
9191
ERROR(forbidden_extended_escaping_string,none,
9292
"%0 cannot be an extended escaping string literal", (StringRef))
9393

94+
ERROR(regex_literal_parsing_error,none,
95+
"%0", (StringRef))
96+
9497
//------------------------------------------------------------------------------
9598
// MARK: Lexer diagnostics
9699
//------------------------------------------------------------------------------
@@ -108,9 +111,6 @@ ERROR(lex_unprintable_ascii_character,none,
108111
ERROR(lex_invalid_utf8,none,
109112
"invalid UTF-8 found in source file", ())
110113

111-
NOTE(lex_experimental_regex_strawperson,none,
112-
"'%0'", (StringRef))
113-
114114
ERROR(lex_single_quote_string,none,
115115
"single-quoted string literal found, use '\"'", ())
116116
ERROR(lex_invalid_curly_quote,none,
@@ -140,6 +140,9 @@ ERROR(lex_invalid_escape_delimiter,none,
140140
ERROR(lex_invalid_closing_delimiter,none,
141141
"too many '#' characters in closing delimiter", ())
142142

143+
ERROR(lex_unterminated_regex,none,
144+
"unterminated regex literal", ())
145+
143146
ERROR(lex_invalid_unicode_scalar,none,
144147
"invalid unicode scalar", ())
145148
ERROR(lex_unicode_escape_braces,none,

include/swift/AST/DiagnosticsSema.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3668,6 +3668,9 @@ ERROR(builtin_string_literal_broken_proto,none,
36683668
ERROR(string_literal_broken_proto,none,
36693669
"protocol 'ExpressibleByStringLiteral' is broken", ())
36703670

3671+
ERROR(regex_decl_broken,none,
3672+
"cannot find 'Regex' type in scope", ())
3673+
36713674
// Array literals
36723675
ERROR(should_use_dictionary_literal,none,
36733676
"dictionary of type %0 cannot be %select{used|initialized}1 "

include/swift/AST/Expr.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,38 @@ class InterpolatedStringLiteralExpr : public LiteralExpr {
961961
return E->getKind() == ExprKind::InterpolatedStringLiteral;
962962
}
963963
};
964-
964+
965+
/// A regular expression literal e.g '(a|c)*'.
966+
class RegexLiteralExpr : public LiteralExpr {
967+
SourceLoc Loc;
968+
StringRef RegexText;
969+
Expr *SemanticExpr;
970+
971+
RegexLiteralExpr(SourceLoc loc, StringRef regexText, Expr *semanticExpr,
972+
bool isImplicit)
973+
: LiteralExpr(ExprKind::RegexLiteral, isImplicit), Loc(loc),
974+
RegexText(regexText), SemanticExpr(semanticExpr) {}
975+
976+
public:
977+
static RegexLiteralExpr *createParsed(ASTContext &ctx, SourceLoc loc,
978+
StringRef regexText,
979+
Expr *semanticExpr);
980+
981+
/// Retrieve the raw regex text.
982+
StringRef getRegexText() const { return RegexText; }
983+
984+
/// Retrieve the semantic expression that the regex will be type-checked and
985+
/// emitted as.
986+
Expr *getSemanticExpr() const { return SemanticExpr; }
987+
void setSemanticExpr(Expr *expr) { SemanticExpr = expr; }
988+
989+
SourceRange getSourceRange() const { return Loc; }
990+
991+
static bool classof(const Expr *E) {
992+
return E->getKind() == ExprKind::RegexLiteral;
993+
}
994+
};
995+
965996
/// MagicIdentifierLiteralExpr - A magic identifier like #file which expands
966997
/// out to a literal at SILGen time.
967998
class MagicIdentifierLiteralExpr : public BuiltinLiteralExpr {

include/swift/AST/ExprNodes.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ ABSTRACT_EXPR(Literal, Expr)
7878
LITERAL_EXPR(MagicIdentifierLiteral, BuiltinLiteralExpr)
7979
EXPR_RANGE(BuiltinLiteral, BooleanLiteral, MagicIdentifierLiteral)
8080
LITERAL_EXPR(InterpolatedStringLiteral, LiteralExpr)
81+
LITERAL_EXPR(RegexLiteral, LiteralExpr)
8182
LITERAL_EXPR(ObjectLiteral, LiteralExpr)
8283
EXPR_RANGE(Literal, NilLiteral, ObjectLiteral)
8384
EXPR(DiscardAssignment, Expr)

include/swift/AST/KnownIdentifiers.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,10 @@ IDENTIFIER(pullback)
250250
IDENTIFIER(TangentVector)
251251
IDENTIFIER(zero)
252252

253+
// Regex literals
254+
IDENTIFIER(Regex)
255+
IDENTIFIER(_regexString)
256+
253257
// Distributed actors
254258
IDENTIFIER(transport)
255259
IDENTIFIER(using)

include/swift/Parse/ExperimentalRegexBridging.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ extern "C" {
88
typedef const char *(* ParseRegexStrawperson)(const char *);
99

1010
void Parser_registerParseRegexStrawperson(ParseRegexStrawperson fn);
11+
bool Parser_hasParseRegexStrawperson();
1112

1213
#ifdef __cplusplus
1314
} // extern "C"

include/swift/Parse/Lexer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,8 @@ class Lexer {
595595
void lexStringLiteral(unsigned CustomDelimiterLen = 0);
596596
void lexEscapedIdentifier();
597597

598+
void lexRegexLiteral(const char *TokStart);
599+
598600
void tryLexEditorPlaceholder();
599601
const char *findEndOfCurlyQuoteStringLiteral(const char *,
600602
bool EmitDiagnostics);

include/swift/Parse/Parser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,6 +1579,7 @@ class Parser {
15791579
ParserResult<Expr> parseExprSelector();
15801580
ParserResult<Expr> parseExprSuper();
15811581
ParserResult<Expr> parseExprStringLiteral();
1582+
ParserResult<Expr> parseExprRegexLiteral();
15821583

15831584
StringRef copyAndStripUnderscores(StringRef text);
15841585

lib/AST/ASTDumper.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,6 +1907,11 @@ class PrintExpr : public ExprVisitor<PrintExpr> {
19071907
E->getInitializer().dump(OS);
19081908
PrintWithColorRAII(OS, ParenthesisColor) << ')';
19091909
}
1910+
void visitRegexLiteralExpr(RegexLiteralExpr *E) {
1911+
printCommon(E, "regex_literal_expr");
1912+
printRec(E->getSemanticExpr());
1913+
PrintWithColorRAII(OS, ParenthesisColor) << ')';
1914+
}
19101915

19111916
void visitObjectLiteralExpr(ObjectLiteralExpr *E) {
19121917
printCommon(E, "object_literal")

lib/AST/ASTWalker.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,15 @@ class Traversal : public ASTVisitor<Traversal, Expr*, Stmt*,
11521152
return E;
11531153
}
11541154

1155+
Expr *visitRegexLiteralExpr(RegexLiteralExpr *E) {
1156+
if (auto *newExpr = doIt(E->getSemanticExpr())) {
1157+
E->setSemanticExpr(newExpr);
1158+
} else {
1159+
return nullptr;
1160+
}
1161+
return E;
1162+
}
1163+
11551164
//===--------------------------------------------------------------------===//
11561165
// Everything Else
11571166
//===--------------------------------------------------------------------===//

0 commit comments

Comments
 (0)