@@ -1311,8 +1311,8 @@ unicodewriter_write_line(Parser *p, PyUnicodeWriter *w, const char *line_start,
13111311}
13121312
13131313static PyObject *
1314- _PyPegen_dedent_string_part (Parser * p , const char * s , size_t len , int indent_char , Py_ssize_t dedent_count ,
1315- int is_raw , int is_first , expr_ty constant , Token * token )
1314+ _PyPegen_dedent_string_part (Parser * p , const char * s , size_t len , const char * indent , Py_ssize_t indent_len ,
1315+ int is_first , int is_raw , expr_ty constant , Token * token )
13161316{
13171317 Py_ssize_t lineno = constant -> lineno ;
13181318 const char * line_start = s ;
@@ -1350,7 +1350,7 @@ _PyPegen_dedent_string_part(Parser *p, const char *s, size_t len, int indent_cha
13501350 lineno ++ ;
13511351
13521352 Py_ssize_t i = 0 ;
1353- while (line_start + i < s_end && i < dedent_count && line_start [i ] == indent_char ) {
1353+ while (line_start + i < s_end && i < indent_len && line_start [i ] == indent [ i ] ) {
13541354 i ++ ;
13551355 }
13561356
@@ -1365,8 +1365,8 @@ _PyPegen_dedent_string_part(Parser *p, const char *s, size_t len, int indent_cha
13651365 line_start += i + 1 ;
13661366 continue ;
13671367 }
1368- if (i < dedent_count ) { // found an invalid indent.
1369- assert (line_start [i ] != indent_char );
1368+ if (i < indent_len ) { // found an invalid indent.
1369+ assert (line_start [i ] != indent [ i ] );
13701370 PyUnicodeWriter_Discard (w );
13711371 RAISE_ERROR_KNOWN_LOCATION (p , PyExc_SyntaxError , lineno , i , lineno , i + 1 ,
13721372 "d-string line missing valid indentation" );
@@ -1392,7 +1392,10 @@ _PyPegen_dedent_string_part(Parser *p, const char *s, size_t len, int indent_cha
13921392}
13931393
13941394static expr_ty
1395- _PyPegen_decode_fstring_part (Parser * p , int is_first , int is_raw , int indent_char , Py_ssize_t dedent_count , expr_ty constant , Token * token ) {
1395+ _PyPegen_decode_fstring_part (Parser * p , int is_first , int is_raw ,
1396+ const char * indent , Py_ssize_t indent_len ,
1397+ expr_ty constant , Token * token )
1398+ {
13961399 assert (PyUnicode_CheckExact (constant -> v .Constant .value ));
13971400
13981401 const char * bstr = PyUnicode_AsUTF8 (constant -> v .Constant .value );
@@ -1402,9 +1405,9 @@ _PyPegen_decode_fstring_part(Parser* p, int is_first, int is_raw, int indent_cha
14021405 is_raw = is_raw || strchr (bstr , '\\' ) == NULL ;
14031406
14041407 PyObject * str = NULL ;
1405- if (dedent_count > 0 ) {
1406- str = _PyPegen_dedent_string_part (p , bstr , strlen (bstr ), indent_char , dedent_count ,
1407- is_raw , is_first , constant , token );
1408+ if (indent_len > 0 ) {
1409+ str = _PyPegen_dedent_string_part (p , bstr , strlen (bstr ), indent , indent_len ,
1410+ is_first , is_raw , constant , token );
14081411 }
14091412 else {
14101413 str = _PyPegen_decode_string (p , is_raw , bstr , strlen (bstr ), token );
@@ -1423,6 +1426,14 @@ _PyPegen_decode_fstring_part(Parser* p, int is_first, int is_raw, int indent_cha
14231426 p -> arena );
14241427}
14251428
1429+ /* defined in unicodeobject.c */
1430+ extern Py_ssize_t
1431+ _Py_search_longest_common_leading_whitespace (
1432+ const char * const src ,
1433+ const char * const end ,
1434+ const char * * output
1435+ );
1436+
14261437static asdl_expr_seq *
14271438_get_resized_exprs (Parser * p , Token * a , asdl_expr_seq * raw_expressions , Token * b , enum string_kind_t string_kind )
14281439{
@@ -1441,14 +1452,15 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
14411452 }
14421453 int is_raw = strpbrk (quote_str , "rR" ) != NULL ;
14431454 int is_dedent = strpbrk (quote_str , "dD" ) != NULL ;
1444- int indent_char = 0 ;
1445- Py_ssize_t indent_count = 0 ;
14461455
14471456 asdl_expr_seq * seq = _Py_asdl_expr_seq_new (total_items , p -> arena );
14481457 if (seq == NULL ) {
14491458 return NULL ;
14501459 }
14511460
1461+ const char * common_indent_start = NULL ;
1462+ Py_ssize_t common_indent_len = 0 ;
1463+
14521464 if (is_dedent ) {
14531465 expr_ty first_item = asdl_seq_GET (raw_expressions , 0 );
14541466 if (first_item -> kind != Constant_kind
@@ -1460,52 +1472,52 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
14601472 return NULL ;
14611473 }
14621474
1463- expr_ty last_item = asdl_seq_GET (raw_expressions , n_items - 1 );
1464- if (last_item -> kind != Constant_kind ) {
1465- RAISE_SYNTAX_ERROR_KNOWN_LOCATION (
1466- last_item ,
1467- "d-string must end with an indent line"
1468- );
1475+ // Instead of calculating common indent from all parts,
1476+ // build temporary string and calculate common indent from it.
1477+ PyBytesWriter * w = PyBytesWriter_Create (0 );
1478+ if (w == NULL ) {
14691479 return NULL ;
14701480 }
14711481
1472- Py_ssize_t blen ;
1473- const char * bstr = PyUnicode_AsUTF8AndSize (last_item -> v .Constant .value , & blen );
1474- if (bstr == NULL ) {
1475- return NULL ;
1476- }
1482+ for (Py_ssize_t i = 0 ; i < n_items ; i ++ ) {
1483+ expr_ty item = asdl_seq_GET (raw_expressions , i );
14771484
1478- // memrchr is GNU extension; use manual loop for portability.
1479- const char * lastline = bstr + blen ;
1480- while (bstr < lastline ) {
1481- if (lastline [-1 ] == '\n' ) {
1482- break ;
1483- }
1484- lastline -- ;
1485- if (* lastline != ' ' && * lastline != '\t' ) {
1486- RAISE_SYNTAX_ERROR_KNOWN_LOCATION (
1487- last_item ,
1488- "d-string must end with an indent line"
1489- );
1490- return NULL ;
1485+ if (item -> kind == JoinedStr_kind ) {
1486+ // Write a placeholder.
1487+ if (PyBytesWriter_WriteBytes (w , "X" , 1 ) < 0 ) {
1488+ PyBytesWriter_Discard (w );
1489+ return NULL ;
1490+ }
1491+ continue ;
14911492 }
1492- }
1493-
1494- // checks indent of the last line.
1495- indent_count = bstr + blen - lastline ;
1496- if (indent_count > 0 ) {
1497- indent_char = lastline [0 ];
1498-
1499- for (Py_ssize_t i = 1 ; i < indent_count ; i ++ ) {
1500- if (lastline [i ] != indent_char ) {
1501- RAISE_ERROR_KNOWN_LOCATION (
1502- p , PyExc_TabError , last_item -> end_lineno , i , last_item -> end_lineno , i + 1 ,
1503- "inconsistent use of tabs and spaces in indentation"
1504- );
1493+ if (item -> kind == Constant_kind ) {
1494+ Py_ssize_t blen ;
1495+ const char * bstr = PyUnicode_AsUTF8AndSize (item -> v .Constant .value , & blen );
1496+ if (bstr == NULL || PyBytesWriter_WriteBytes (w , bstr , blen ) < 0 ) {
1497+ PyBytesWriter_Discard (w );
15051498 return NULL ;
15061499 }
1500+ continue ;
15071501 }
15081502 }
1503+ // Add a terminator to include the last line before the ending quote
1504+ if (PyBytesWriter_WriteBytes (w , "X" , 1 ) < 0 ) {
1505+ PyBytesWriter_Discard (w );
1506+ return NULL ;
1507+ }
1508+
1509+ // TODO: instead of creating temp_bytes, we could search
1510+ // common index from each part directly. But this need reimplementation
1511+ // of _Py_search_longest_common_leading_whitespace.
1512+ PyObject * temp_bytes = PyBytesWriter_Finish (w );
1513+ if (temp_bytes == NULL ) {
1514+ return NULL ;
1515+ }
1516+ _PyArena_AddPyObject (p -> arena , temp_bytes );
1517+ const char * temp_str = PyBytes_AsString (temp_bytes );
1518+ const char * temp_end = temp_str + PyBytes_GET_SIZE (temp_bytes );
1519+ common_indent_len = _Py_search_longest_common_leading_whitespace (
1520+ temp_str , temp_end , & common_indent_start );
15091521 }
15101522
15111523 Py_ssize_t index = 0 ;
@@ -1539,7 +1551,7 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
15391551 }
15401552
15411553 if (item -> kind == Constant_kind ) {
1542- item = _PyPegen_decode_fstring_part (p , i == 0 , is_raw , indent_char , indent_count , item , b );
1554+ item = _PyPegen_decode_fstring_part (p , i == 0 , is_raw , common_indent_start , common_indent_len , item , b );
15431555 if (item == NULL ) {
15441556 return NULL ;
15451557 }
0 commit comments