Skip to content

Commit c8a9482

Browse files
committed
Add TOK_GET_STRING_PREFIX macro and change tstring field to enum
1 parent a5e3bde commit c8a9482

File tree

6 files changed

+35
-25
lines changed

6 files changed

+35
-25
lines changed

Grammar/python.gram

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1225,7 +1225,7 @@ invalid_expression:
12251225
RAISE_SYNTAX_ERROR_KNOWN_LOCATION (a, "expected expression before 'if', but statement is given") }
12261226
| a='lambda' [lambda_params] b=':' &FSTRING_MIDDLE {
12271227
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "%c-string: lambda expressions are not allowed without parentheses",
1228-
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f') }
1228+
TOK_GET_STRING_PREFIX(p->tok)) }
12291229

12301230
invalid_named_expression(memo):
12311231
| a=expression ':=' expression {

Parser/action_helpers.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -966,7 +966,7 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
966966
return RAISE_SYNTAX_ERROR_KNOWN_RANGE(
967967
conv_token, conv,
968968
"%c-string: conversion type must come right after the exclamanation mark",
969-
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f'
969+
TOK_GET_STRING_PREFIX(p->tok)
970970
);
971971
}
972972

@@ -975,7 +975,7 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv)
975975
!(first == 's' || first == 'r' || first == 'a')) {
976976
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(conv,
977977
"%c-string: invalid conversion character %R: expected 's', 'r', or 'a'",
978-
TOK_GET_MODE(p->tok)->tstring ? 't' : 'f',
978+
TOK_GET_STRING_PREFIX(p->tok),
979979
conv->v.Name.id);
980980
return NULL;
981981
}
@@ -1295,7 +1295,7 @@ _PyPegen_decode_fstring_part(Parser* p, int is_raw, expr_ty constant, Token* tok
12951295
}
12961296

12971297
static asdl_expr_seq *
1298-
_get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b, int tstring)
1298+
_get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b, enum string_kind_t string_kind)
12991299
{
13001300
Py_ssize_t n_items = asdl_seq_LEN(raw_expressions);
13011301
Py_ssize_t total_items = n_items;
@@ -1329,8 +1329,9 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
13291329
asdl_expr_seq *values = item->v.JoinedStr.values;
13301330
if (asdl_seq_LEN(values) != 2) {
13311331
PyErr_Format(PyExc_SystemError,
1332-
tstring ? "unexpected TemplateStr node without debug data in t-string at line %d"
1333-
: "unexpected JoinedStr node without debug data in f-string at line %d",
1332+
string_kind == TSTRING
1333+
? "unexpected TemplateStr node without debug data in t-string at line %d"
1334+
: "unexpected JoinedStr node without debug data in f-string at line %d",
13341335
item->lineno);
13351336
return NULL;
13361337
}
@@ -1340,7 +1341,7 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
13401341
asdl_seq_SET(seq, index++, first);
13411342

13421343
expr_ty second = asdl_seq_GET(values, 1);
1343-
assert((tstring && second->kind == Interpolation_kind) || second->kind == FormattedValue_kind);
1344+
assert((string_kind == TSTRING && second->kind == Interpolation_kind) || second->kind == FormattedValue_kind);
13441345
asdl_seq_SET(seq, index++, second);
13451346

13461347
continue;
@@ -1382,7 +1383,7 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
13821383
expr_ty
13831384
_PyPegen_template_str(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b) {
13841385

1385-
asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, 1);
1386+
asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, TSTRING);
13861387
return _PyAST_TemplateStr(resized_exprs, a->lineno, a->col_offset,
13871388
b->end_lineno, b->end_col_offset,
13881389
p->arena);
@@ -1391,7 +1392,7 @@ _PyPegen_template_str(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token
13911392
expr_ty
13921393
_PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b) {
13931394

1394-
asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, 0);
1395+
asdl_expr_seq *resized_exprs = _get_resized_exprs(p, a, raw_expressions, b, FSTRING);
13951396
return _PyAST_JoinedStr(resized_exprs, a->lineno, a->col_offset,
13961397
b->end_lineno, b->end_col_offset,
13971398
p->arena);

Parser/lexer/lexer.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
3838
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
3939
#endif
4040

41+
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
4142
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
4243
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
4344
_PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
@@ -113,7 +114,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
113114
assert(c == '}' || c == ':' || c == '!');
114115
tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
115116

116-
if (!(tok_mode->f_string_debug || tok_mode->tstring) || token->metadata) {
117+
if (!(tok_mode->f_string_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
117118
return 0;
118119
}
119120
PyObject *res = NULL;
@@ -993,12 +994,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
993994
the_current_tok->in_format_spec = 0;
994995
the_current_tok->f_string_debug = 0;
995996

996-
int tstring = 0;
997+
enum string_kind_t string_kind = FSTRING;
997998
switch (*tok->start) {
998999
case 'T':
9991000
case 't':
10001001
the_current_tok->f_string_raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1001-
tstring = 1;
1002+
string_kind = TSTRING;
10021003
break;
10031004
case 'F':
10041005
case 'f':
@@ -1007,16 +1008,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
10071008
case 'R':
10081009
case 'r':
10091010
the_current_tok->f_string_raw = 1;
1010-
tstring = Py_TOLOWER(*(tok->start + 1)) == 't';
1011+
if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1012+
string_kind = TSTRING;
1013+
}
10111014
break;
10121015
default:
10131016
Py_UNREACHABLE();
10141017
}
10151018

1016-
the_current_tok->tstring = tstring;
1019+
the_current_tok->string_kind = TSTRING;
10171020
the_current_tok->curly_bracket_depth = 0;
10181021
the_current_tok->curly_bracket_expr_start_depth = -1;
1019-
return tstring ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1022+
return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
10201023
}
10211024

10221025
letter_quote:
@@ -1079,7 +1082,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
10791082
if (the_current_tok->f_string_quote == quote &&
10801083
the_current_tok->f_string_quote_size == quote_size) {
10811084
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1082-
"%c-string: expecting '}'", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
1085+
"%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
10831086
}
10841087
}
10851088

@@ -1209,7 +1212,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12091212
case '}':
12101213
if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
12111214
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1212-
"%c-string: single '}' is not allowed", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
1215+
"%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
12131216
}
12141217
if (!tok->tok_extra_tokens && !tok->level) {
12151218
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
@@ -1230,7 +1233,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12301233
int previous_bracket = current_tok->curly_bracket_depth - 1;
12311234
if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
12321235
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1233-
"%c-string: unmatched '%c'", TOK_GET_MODE(tok)->tstring ? 't' : 'f', c));
1236+
"%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
12341237
}
12351238
}
12361239
if (tok->parenlinenostack[tok->level] != tok->lineno) {
@@ -1252,7 +1255,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
12521255
current_tok->curly_bracket_depth--;
12531256
if (current_tok->curly_bracket_depth < 0) {
12541257
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1255-
TOK_GET_MODE(tok)->tstring ? 't' : 'f', c));
1258+
TOK_GET_STRING_PREFIX(tok), c));
12561259
}
12571260
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
12581261
current_tok->curly_bracket_expr_start_depth--;
@@ -1303,7 +1306,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13031306
current_tok->curly_bracket_expr_start_depth++;
13041307
if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
13051308
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1306-
"%c-string: expressions nested too deeply", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
1309+
"%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
13071310
}
13081311
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
13091312
return tok_get_normal_mode(tok, current_tok, token);
@@ -1383,7 +1386,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13831386
_PyTokenizer_syntaxerror(tok,
13841387
"unterminated triple-quoted %c-string literal"
13851388
" (detected at line %d)",
1386-
TOK_GET_MODE(tok)->tstring ? 't' : 'f', start);
1389+
TOK_GET_STRING_PREFIX(tok), start);
13871390
if (c != '\n') {
13881391
tok->done = E_EOFS;
13891392
}
@@ -1392,7 +1395,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
13921395
else {
13931396
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
13941397
"unterminated %c-string literal (detected at"
1395-
" line %d)", TOK_GET_MODE(tok)->tstring ? 't' : 'f', start));
1398+
" line %d)", TOK_GET_STRING_PREFIX(tok), start));
13961399
}
13971400
}
13981401

@@ -1414,7 +1417,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
14141417
current_tok->curly_bracket_expr_start_depth++;
14151418
if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
14161419
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1417-
"%c-string: expressions nested too deeply", TOK_GET_MODE(tok)->tstring ? 't' : 'f'));
1420+
"%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
14181421
}
14191422
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
14201423
current_tok->in_format_spec = 0;

Parser/lexer/state.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ enum tokenizer_mode_kind_t {
3636
TOK_FSTRING_MODE,
3737
};
3838

39+
enum string_kind_t {
40+
FSTRING,
41+
TSTRING,
42+
};
43+
3944
#define MAX_EXPR_NESTING 3
4045

4146
typedef struct _tokenizer_mode {
@@ -60,7 +65,7 @@ typedef struct _tokenizer_mode {
6065
int f_string_debug;
6166
int in_format_spec;
6267

63-
int tstring;
68+
enum string_kind_t string_kind;
6469
} tokenizer_mode;
6570

6671
/* Tokenizer state */

Parser/parser.c

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Parser/pegen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#define CURRENT_POS (-5)
2828

2929
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
30+
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
3031

3132
typedef struct _memo {
3233
int type;

0 commit comments

Comments
 (0)