Skip to content

Commit 2addef7

Browse files
committed
Add new tokens TSTRING_MIDDLE & TSTRING_END
1 parent 3e10c49 commit 2addef7

File tree

10 files changed

+305
-234
lines changed

10 files changed

+305
-234
lines changed

Doc/library/token.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,30 @@ The token constants are:
142142

143143
.. versionadded:: next
144144

145+
.. data:: TSTRING_MIDDLE
146+
147+
Token value used for literal text inside a t-string literal
148+
including format specifications.
149+
150+
.. impl-detail::
151+
152+
Replacement fields (that is, the non-literal parts of f-strings) use
153+
the same tokens as other expressions, and are delimited by
154+
:data:`LBRACE`, :data:`RBRACE`, :data:`EXCLAMATION` and :data:`COLON`
155+
tokens.
156+
157+
.. versionadded:: next
158+
159+
.. data:: TSTRING_END
160+
161+
Token value used to indicate the end of a t-string.
162+
163+
.. impl-detail::
164+
165+
The token string contains the closing quote(s).
166+
167+
.. versionadded:: next
168+
145169
.. data:: ENDMARKER
146170

147171
Token value that indicates the end of input.

Grammar/Tokens

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,11 @@ TYPE_IGNORE
6060
TYPE_COMMENT
6161
SOFT_KEYWORD
6262
FSTRING_START
63-
TSTRING_START
6463
FSTRING_MIDDLE
6564
FSTRING_END
65+
TSTRING_START
66+
TSTRING_MIDDLE
67+
TSTRING_END
6668
COMMENT
6769
NL
6870
ERRORTOKEN

Grammar/python.gram

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,7 @@ tstring_format_spec_replacement_field[expr_ty]:
945945
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
946946
| invalid_tstring_replacement_field
947947
tstring_format_spec[expr_ty]:
948-
| t=FSTRING_MIDDLE { _PyPegen_decoded_constant_from_token(p, t) }
948+
| t=TSTRING_MIDDLE { _PyPegen_decoded_constant_from_token(p, t) }
949949
| tstring_format_spec_replacement_field
950950
tstring_full_format_spec[ResultTokenWithMetadata*]:
951951
| colon=':' spec=tstring_format_spec* { _PyPegen_setup_full_format_spec(p, colon, (asdl_expr_seq *) spec, EXTRA) }
@@ -955,9 +955,9 @@ tstring_replacement_field[expr_ty]:
955955
| invalid_tstring_replacement_field
956956
tstring_middle[expr_ty]:
957957
| tstring_replacement_field
958-
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
958+
| t=TSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
959959
tstring[expr_ty] (memo):
960-
| a=TSTRING_START b=tstring_middle* c=FSTRING_END { _PyPegen_template_str(p, a, (asdl_expr_seq*)b, c) }
960+
| a=TSTRING_START b=tstring_middle* c=TSTRING_END { _PyPegen_template_str(p, a, (asdl_expr_seq*)b, c) }
961961

962962
string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_string(p, s) }
963963
strings[expr_ty] (memo): a[asdl_expr_seq*]=(fstring|string|tstring)+ { _PyPegen_concatenate_strings(p, a, EXTRA) }
@@ -1224,8 +1224,9 @@ invalid_expression:
12241224
| a[stmt_ty]=(pass_stmt|break_stmt|continue_stmt) 'if' b=disjunction 'else' c=simple_stmt {
12251225
RAISE_SYNTAX_ERROR_KNOWN_LOCATION (a, "expected expression before 'if', but statement is given") }
12261226
| a='lambda' [lambda_params] b=':' &FSTRING_MIDDLE {
1227-
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "%c-string: lambda expressions are not allowed without parentheses",
1228-
TOK_GET_STRING_PREFIX(p->tok)) }
1227+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "f-string: lambda expressions are not allowed without parentheses") }
1228+
| a='lambda' [lambda_params] b=':' &TSTRING_MIDDLE {
1229+
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "t-string: lambda expressions are not allowed without parentheses") }
12291230

12301231
invalid_named_expression(memo):
12311232
| a=expression ':=' expression {

Include/internal/pycore_token.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,15 @@ extern "C" {
7373
#define TYPE_COMMENT 57
7474
#define SOFT_KEYWORD 58
7575
#define FSTRING_START 59
76-
#define TSTRING_START 60
77-
#define FSTRING_MIDDLE 61
78-
#define FSTRING_END 62
79-
#define COMMENT 63
80-
#define NL 64
81-
#define ERRORTOKEN 65
82-
#define N_TOKENS 67
76+
#define FSTRING_MIDDLE 60
77+
#define FSTRING_END 61
78+
#define TSTRING_START 62
79+
#define TSTRING_MIDDLE 63
80+
#define TSTRING_END 64
81+
#define COMMENT 65
82+
#define NL 66
83+
#define ERRORTOKEN 67
84+
#define N_TOKENS 69
8385
#define NT_OFFSET 256
8486

8587
/* Special definitions for cooperation with parser */
@@ -92,7 +94,8 @@ extern "C" {
9294
(x) == INDENT || \
9395
(x) == DEDENT)
9496
#define ISSTRINGLIT(x) ((x) == STRING || \
95-
(x) == FSTRING_MIDDLE)
97+
(x) == FSTRING_MIDDLE || \
98+
(x) == TSTRING_MIDDLE)
9699

97100

98101
// Export these 4 symbols for 'test_peg_generator'

Lib/token.py

Lines changed: 10 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/tokenize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def untokenize(self, iterable):
251251
self.tokens.append(indent)
252252
self.prev_col = len(indent)
253253
startline = False
254-
elif tok_type == FSTRING_MIDDLE:
254+
elif tok_type in (FSTRING_MIDDLE, TSTRING_MIDDLE):
255255
if '{' in token or '}' in token:
256256
token = self.escape_brackets(token)
257257
last_line = token.splitlines()[-1]
@@ -308,7 +308,7 @@ def compat(self, token, iterable):
308308
elif startline and indents:
309309
toks_append(indents[-1])
310310
startline = False
311-
elif toknum == FSTRING_MIDDLE:
311+
elif toknum in (FSTRING_MIDDLE, TSTRING_MIDDLE):
312312
tokval = self.escape_brackets(tokval)
313313

314314
# Insert a space between two consecutive brackets if we are in an f-string

Parser/lexer/lexer.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
3838
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
3939
#endif
4040

41+
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42+
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
4143
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
4244
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
4345
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
@@ -1335,7 +1337,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13351337
p_start = tok->start;
13361338
p_end = tok->cur;
13371339
tok->tok_mode_stack_index--;
1338-
return MAKE_TOKEN(FSTRING_END);
1340+
return MAKE_TOKEN(FTSTRING_END(current_tok));
13391341

13401342
f_string_middle:
13411343

@@ -1375,7 +1377,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13751377
current_tok->in_format_spec = 0;
13761378
p_start = tok->start;
13771379
p_end = tok->cur;
1378-
return MAKE_TOKEN(FSTRING_MIDDLE);
1380+
return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
13791381
}
13801382

13811383
assert(tok->multi_line_start != NULL);
@@ -1435,12 +1437,12 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
14351437
p_start = tok->start;
14361438
p_end = tok->cur - 1;
14371439
}
1438-
return MAKE_TOKEN(FSTRING_MIDDLE);
1440+
return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
14391441
} else if (c == '}') {
14401442
if (unicode_escape) {
14411443
p_start = tok->start;
14421444
p_end = tok->cur;
1443-
return MAKE_TOKEN(FSTRING_MIDDLE);
1445+
return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
14441446
}
14451447
int peek = tok_nextc(tok);
14461448

@@ -1460,7 +1462,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
14601462
p_start = tok->start;
14611463
p_end = tok->cur;
14621464
}
1463-
return MAKE_TOKEN(FSTRING_MIDDLE);
1465+
return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
14641466
} else if (c == '\\') {
14651467
int peek = tok_nextc(tok);
14661468
if (peek == '\r') {
@@ -1502,7 +1504,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
15021504
}
15031505
p_start = tok->start;
15041506
p_end = tok->cur;
1505-
return MAKE_TOKEN(FSTRING_MIDDLE);
1507+
return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
15061508
}
15071509

15081510
static int

0 commit comments

Comments
 (0)