Skip to content

Commit 67136c8

Browse files
lihaoyiclaude
andcommitted
Add support for '''- delimited strings (dedented string literals) in Scala 3
- Add new token types: tDEDENTED_STRING, tINTERPOLATED_DEDENTED_STRING, tINTERPOLATED_DEDENTED_RAW_STRING - Update lexer grammar to recognize ''' delimiters with extended delimiter support - Add INSIDE_DEDENTED_INTERPOLATED_STRING state and DedentedLevel class - Support string interpolation for dedented strings - Add comprehensive test coverage for basic, extended, and interpolated dedented strings - Regenerated _ScalaCoreLexer.java from updated flex grammar 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 2b206f3 commit 67136c8

File tree

6 files changed

+1452
-1094
lines changed

6 files changed

+1452
-1094
lines changed

scala/scala-impl/src/org/jetbrains/plugins/scala/lang/lexer/ScalaTokenTypes.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ public interface ScalaTokenTypes {
7575
IElementType tINTERPOLATED_MULTILINE_RAW_STRING = new ScalaTokenType("interpolated multiline raw string");
7676
IElementType tINTERPOLATED_RAW_STRING = new ScalaTokenType("interpolated raw string");
7777

78+
// Dedented string literals (Scala 3)
79+
IElementType tDEDENTED_STRING = new ScalaTokenType("dedented string");
80+
IElementType tINTERPOLATED_DEDENTED_STRING = new ScalaTokenType("interpolated dedented string");
81+
IElementType tINTERPOLATED_DEDENTED_RAW_STRING = new ScalaTokenType("interpolated dedented raw string");
82+
7883
IElementType tCHAR = new ScalaTokenType("Character");
7984
IElementType tSYMBOL = new ScalaTokenType("Symbol");
8085

@@ -267,7 +272,9 @@ public interface ScalaTokenTypes {
267272
tMULTILINE_STRING,
268273
tINTERPOLATED_STRING,
269274
tINTERPOLATED_MULTILINE_STRING,
270-
tINTERPOLATED_STRING_END
275+
tINTERPOLATED_STRING_END,
276+
tDEDENTED_STRING,
277+
tINTERPOLATED_DEDENTED_STRING
271278
);
272279

273280
TokenSet VAL_VAR_TOKEN_SET = TokenSet.create(kVAL, kVAR);

scala/scala-impl/src/org/jetbrains/plugins/scala/lang/lexer/core/_ScalaCoreLexer.flex

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
8686
}
8787
}
8888

89+
private static class DedentedLevel extends InterpolatedStringLevel {
90+
public DedentedLevel(CharSequence intepolator) {
91+
super(intepolator);
92+
}
93+
public int getState() {
94+
return INSIDE_DEDENTED_INTERPOLATED_STRING;
95+
}
96+
}
97+
8998
private boolean isScala3;
9099

91100
//
@@ -94,6 +103,7 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
94103
//to get id after $ in interpolated String
95104
private boolean haveIdInString = false;
96105
private boolean haveIdInMultilineString = false;
106+
private boolean haveIdInDedentedString = false;
97107
// Currently opened interpolated Strings. Each int represents the number of the opened left structural braces in the String
98108
private Stack<InterpolatedStringLevel> nestedString = new Stack<>();
99109
private CharSequence lastSeenInterpolator = null;
@@ -105,6 +115,7 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
105115
public void resetCustom() {
106116
haveIdInString = false;
107117
haveIdInMultilineString = false;
118+
haveIdInDedentedString = false;
108119
nestedString.clear();
109120
lastSeenInterpolator = null;
110121
}
@@ -113,8 +124,10 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
113124
return isInsideInterpolatedString() ||
114125
haveIdInString ||
115126
haveIdInMultilineString ||
127+
haveIdInDedentedString ||
116128
yystate() == INSIDE_INTERPOLATED_STRING ||
117-
yystate() == INSIDE_MULTI_LINE_INTERPOLATED_STRING;
129+
yystate() == INSIDE_MULTI_LINE_INTERPOLATED_STRING ||
130+
yystate() == INSIDE_DEDENTED_INTERPOLATED_STRING;
118131
}
119132

120133
private boolean shouldProcessBracesForInterpolated() {
@@ -145,6 +158,9 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
145158
} else if (haveIdInMultilineString) {
146159
haveIdInMultilineString = false;
147160
yybegin(INSIDE_MULTI_LINE_INTERPOLATED_STRING);
161+
} else if (haveIdInDedentedString) {
162+
haveIdInDedentedString = false;
163+
yybegin(INSIDE_DEDENTED_INTERPOLATED_STRING);
148164
}
149165
}
150166

@@ -158,6 +174,8 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
158174
typeAdjusted = tINTERPOLATED_RAW_STRING;
159175
else if (type == tINTERPOLATED_MULTILINE_STRING && isInsideRawInterpolator())
160176
typeAdjusted = tINTERPOLATED_MULTILINE_RAW_STRING;
177+
else if (type == tINTERPOLATED_DEDENTED_STRING && isInsideRawInterpolator())
178+
typeAdjusted = tINTERPOLATED_DEDENTED_RAW_STRING;
161179
else
162180
typeAdjusted = type;
163181

@@ -166,6 +184,11 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
166184

167185
@NotNull
168186
private IElementType processDollarInsideString(boolean isInsideMultiline) {
187+
return processDollarInsideString(isInsideMultiline, false);
188+
}
189+
190+
@NotNull
191+
private IElementType processDollarInsideString(boolean isInsideMultiline, boolean isInsideDedented) {
169192
final IElementType token;
170193

171194
// TODO: remove this chech, this should always be false, cause $$ is handled by INTERPOLATED_STRING_ESCAPE pattern earlier
@@ -175,7 +198,9 @@ import static org.jetbrains.plugins.scala.lang.lexer.ScalaTokenTypes.*;
175198
token = tINTERPOLATED_STRING_ESCAPE;
176199
}
177200
else {
178-
if (isInsideMultiline) {
201+
if (isInsideDedented) {
202+
haveIdInDedentedString = true;
203+
} else if (isInsideMultiline) {
179204
haveIdInMultilineString = true;
180205
} else {
181206
haveIdInString = true;
@@ -277,6 +302,9 @@ STRING_BEGIN = \"([^\\\"\r\n]|{CHAR_ESCAPE_SEQUENCE})*
277302
STRING_LITERAL={STRING_BEGIN} \"
278303
MULTI_LINE_STRING = \"\"\" ( (\"(\")?)? [^\"] )* \"\"\" (\")* // Multi-line string
279304

305+
// Dedented string literals (Scala 3) - uses single quotes
306+
DEDENTED_STRING = \'\'\'+ ( (\'(\')?)? [^\'] )* \'\'\'+ // Dedented multiline string
307+
280308
////////String Interpolation////////
281309
INTERPOLATED_STRING_ID = {varid}
282310

@@ -287,6 +315,9 @@ INTERPOLATED_STRING_PART_NOT_ESCAPED = [^\\\"\r\n\$]
287315
INTERPOLATED_MULTI_LINE_STRING_BEGIN = \"\"\"{INTERPOLATED_MULTI_LINE_STRING_PART}*
288316
INTERPOLATED_MULTI_LINE_STRING_PART = ((\"(\")?)? [^\"\$])
289317

318+
INTERPOLATED_DEDENTED_STRING_BEGIN = \'\'\'+ {INTERPOLATED_DEDENTED_STRING_PART}*
319+
INTERPOLATED_DEDENTED_STRING_PART = ((\'(\')?)? [^\'\$])
320+
290321
// TODO: rename, it's missleading
291322
INTERPOLATED_STRING_ESCAPE = "$$"
292323
//INTERPOLATED_STRING_VARIABLE = "$"({identifier})
@@ -324,6 +355,7 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
324355
%xstate WAIT_FOR_INTERPOLATED_STRING
325356
%xstate INSIDE_INTERPOLATED_STRING
326357
%xstate INSIDE_MULTI_LINE_INTERPOLATED_STRING
358+
%xstate INSIDE_DEDENTED_INTERPOLATED_STRING
327359
%xstate INJ_COMMON_STATE
328360

329361
%%
@@ -344,7 +376,7 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
344376
{END_OF_LINE_COMMENT} { return process(tLINE_COMMENT); }
345377

346378

347-
{INTERPOLATED_STRING_ID} / ({INTERPOLATED_STRING_BEGIN} | {INTERPOLATED_MULTI_LINE_STRING_BEGIN}) {
379+
{INTERPOLATED_STRING_ID} / ({INTERPOLATED_STRING_BEGIN} | {INTERPOLATED_MULTI_LINE_STRING_BEGIN} | {INTERPOLATED_DEDENTED_STRING_BEGIN}) {
348380
yybegin(WAIT_FOR_INTERPOLATED_STRING);
349381
// TODO: remove this check: looks like it's a dead code,
350382
// yytext() should only return text that is matched by INTERPOLATED_STRING_ID, which can't end with \"\"
@@ -367,6 +399,12 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
367399
nestedString.push(new MultilineLevel(lastSeenInterpolator));
368400
return process(tINTERPOLATED_MULTILINE_STRING);
369401
}
402+
403+
{INTERPOLATED_DEDENTED_STRING_BEGIN} {
404+
yybegin(INSIDE_DEDENTED_INTERPOLATED_STRING);
405+
nestedString.push(new DedentedLevel(lastSeenInterpolator));
406+
return process(tINTERPOLATED_DEDENTED_STRING);
407+
}
370408
}
371409

372410
<INJ_COMMON_STATE> {identifier} {
@@ -470,6 +508,46 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
470508
}
471509
}
472510

511+
<INSIDE_DEDENTED_INTERPOLATED_STRING> {
512+
{INTERPOLATED_STRING_ESCAPE} {
513+
return process(tINTERPOLATED_STRING_ESCAPE);
514+
}
515+
516+
(\'\') / "$" {
517+
return process(tINTERPOLATED_DEDENTED_STRING);
518+
}
519+
520+
{INTERPOLATED_DEDENTED_STRING_PART}+ {
521+
return process(tINTERPOLATED_DEDENTED_STRING);
522+
}
523+
524+
"$"{identifier} {
525+
return processDollarInsideString(false, true);
526+
}
527+
528+
\'\'\'+ (\')+ {
529+
yypushback(yylength() - 1);
530+
return process(tINTERPOLATED_DEDENTED_STRING);
531+
}
532+
533+
\'\'\'+ {
534+
return processOutsideString();
535+
}
536+
537+
"$" / "{" {
538+
yybegin(COMMON_STATE);
539+
return process(tINTERPOLATED_STRING_INJECTION);
540+
}
541+
542+
\' / [^\'] {
543+
return process(tINTERPOLATED_DEDENTED_STRING);
544+
}
545+
546+
[^] {
547+
return process(tWRONG_STRING);
548+
}
549+
}
550+
473551

474552
"/**" ("*"? [^\/])* "*/" { //for comments in interpolated strings
475553
return process(ScalaDocElementTypes.SCALA_DOC_COMMENT);
@@ -483,6 +561,8 @@ XML_BEGIN = "<" ("_" | [:jletter:]) | "<!--" | "<?" ("_" | [:jletter:]) | "<![CD
483561

484562
{MULTI_LINE_STRING} { return process(tMULTILINE_STRING); }
485563

564+
{DEDENTED_STRING} { if (isScala3) return process(tDEDENTED_STRING); else return process(tIDENTIFIER); }
565+
486566
// TODO: incomplete strings should be handled the same way with interpolated strings
487567
// what can be parsed should be parsed as tSTRING,
488568
// tWRONG_LINE_BREAK_IN_STRING error token should be added at unexpected new line should

0 commit comments

Comments
 (0)