@@ -1292,24 +1292,124 @@ _PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq
12921292
12931293// Fstring stuff
12941294
1295+ static int
1296+ unicodewriter_write_line (Parser * p , PyUnicodeWriter * w , const char * line_start , const char * line_end ,
1297+ int is_raw , Token * token )
1298+ {
1299+ if (is_raw || memchr (line_start , '\\' , line_end - line_start ) == NULL ) {
1300+ return PyUnicodeWriter_WriteUTF8 (w , line_start , line_end - line_start );
1301+ }
1302+ else {
1303+ PyObject * line = _PyPegen_decode_string (p , 1 , line_start , line_end - line_start , token );
1304+ if (line == NULL || PyUnicodeWriter_WriteStr (w , line ) < 0 ) {
1305+ Py_XDECREF (line );
1306+ return -1 ;
1307+ }
1308+ Py_DECREF (line );
1309+ }
1310+ return 0 ;
1311+ }
1312+
1313+ static PyObject *
1314+ _PyPegen_dedent_string_part (Parser * p , const char * s , size_t len , int indent_char , Py_ssize_t dedent_count ,
1315+ int is_raw , int is_first , expr_ty constant , Token * token )
1316+ {
1317+ Py_ssize_t lineno = constant -> lineno ;
1318+ const char * line_start = s ;
1319+ const char * s_end = s + len ;
1320+
1321+ PyUnicodeWriter * w = PyUnicodeWriter_Create (len );
1322+ if (w == NULL ) {
1323+ return NULL ;
1324+ }
1325+ if (is_first ) {
1326+ assert (line_start [0 ] == '\n' );
1327+ line_start ++ ; // skip the first newline
1328+ }
1329+ else {
1330+ // Example: df"""
1331+ // first part {param} second part
1332+ // next line
1333+ // """"
1334+ // We don't need to dedent the first line in the non-first parts.
1335+ const char * line_end = memchr (line_start , '\n' , s_end - line_start );
1336+ if (line_end ) {
1337+ line_end ++ ; // include the newline
1338+ }
1339+ else {
1340+ line_end = s_end ;
1341+ }
1342+ if (unicodewriter_write_line (p , w , line_start , line_end , is_raw , token ) < 0 ) {
1343+ PyUnicodeWriter_Discard (w );
1344+ return NULL ;
1345+ }
1346+ line_start = line_end ;
1347+ }
1348+
1349+ while (line_start < s + len ) {
1350+ lineno ++ ;
1351+
1352+ Py_ssize_t i = 0 ;
1353+ while (line_start + i < s_end && i < dedent_count && line_start [i ] == indent_char ) {
1354+ i ++ ;
1355+ }
1356+
1357+ if (line_start [i ] == '\0' ) { // found an empty line without newline.
1358+ break ;
1359+ }
1360+ if (line_start [i ] == '\n' ) { // found an empty line with newline.
1361+ if (PyUnicodeWriter_WriteChar (w , '\n' ) < 0 ) {
1362+ PyUnicodeWriter_Discard (w );
1363+ return NULL ;
1364+ }
1365+ line_start += i + 1 ;
1366+ continue ;
1367+ }
1368+ if (i < dedent_count ) { // found an invalid indent.
1369+ assert (line_start [i ] != indent_char );
1370+ PyUnicodeWriter_Discard (w );
1371+ RAISE_ERROR_KNOWN_LOCATION (p , PyExc_SyntaxError , lineno , i , lineno , i + 1 ,
1372+ "d-string line missing valid indentation" );
1373+ return NULL ;
1374+ }
1375+
1376+ // found a indented line. let's dedent it.
1377+ line_start += i ;
1378+ const char * line_end = memchr (line_start , '\n' , s_end - line_start );
1379+ if (line_end ) {
1380+ line_end ++ ; // include the newline
1381+ }
1382+ else {
1383+ line_end = s_end ;
1384+ }
1385+ if (unicodewriter_write_line (p , w , line_start , line_end , is_raw , token ) < 0 ) {
1386+ PyUnicodeWriter_Discard (w );
1387+ return NULL ;
1388+ }
1389+ line_start = line_end ;
1390+ }
1391+ return PyUnicodeWriter_Finish (w );
1392+ }
1393+
12951394static expr_ty
1296- _PyPegen_decode_fstring_part (Parser * p , int is_raw , expr_ty constant , Token * token ) {
1395+ _PyPegen_decode_fstring_part (Parser * p , int is_first , int is_raw , int indent_char , Py_ssize_t dedent_count , expr_ty constant , Token * token ) {
12971396 assert (PyUnicode_CheckExact (constant -> v .Constant .value ));
12981397
12991398 const char * bstr = PyUnicode_AsUTF8 (constant -> v .Constant .value );
13001399 if (bstr == NULL ) {
13011400 return NULL ;
13021401 }
1402+ is_raw = is_raw || strchr (bstr , '\\' ) == NULL ;
13031403
1304- size_t len ;
1305- if (strcmp (bstr , "{{" ) == 0 || strcmp (bstr , "}}" ) == 0 ) {
1306- len = 1 ;
1307- } else {
1308- len = strlen (bstr );
1404+ PyObject * str = NULL ;
1405+ if (dedent_count > 0 ) {
1406+ str = _PyPegen_dedent_string_part (p , bstr , strlen (bstr ), indent_char , dedent_count ,
1407+ is_raw , is_first , constant , token );
1408+ }
1409+ else {
1410+ str = _PyPegen_decode_string (p , is_raw , bstr , strlen (bstr ), token );
13091411 }
13101412
1311- is_raw = is_raw || strchr (bstr , '\\' ) == NULL ;
1312- PyObject * str = _PyPegen_decode_string (p , is_raw , bstr , len , token );
13131413 if (str == NULL ) {
13141414 _Pypegen_raise_decode_error (p );
13151415 return NULL ;
@@ -1340,12 +1440,74 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
13401440 return NULL ;
13411441 }
13421442 int is_raw = strpbrk (quote_str , "rR" ) != NULL ;
1443+ int is_dedent = strpbrk (quote_str , "dD" ) != NULL ;
1444+ int indent_char = 0 ;
1445+ Py_ssize_t indent_count = 0 ;
13431446
13441447 asdl_expr_seq * seq = _Py_asdl_expr_seq_new (total_items , p -> arena );
13451448 if (seq == NULL ) {
13461449 return NULL ;
13471450 }
13481451
1452+ if (is_dedent ) {
1453+ expr_ty first_item = asdl_seq_GET (raw_expressions , 0 );
1454+ if (first_item -> kind != Constant_kind
1455+ || PyUnicode_ReadChar (first_item -> v .Constant .value , 0 ) != '\n' ) {
1456+ RAISE_SYNTAX_ERROR_KNOWN_LOCATION (
1457+ first_item ,
1458+ "d-string must start with a newline"
1459+ );
1460+ return NULL ;
1461+ }
1462+
1463+ expr_ty last_item = asdl_seq_GET (raw_expressions , n_items - 1 );
1464+ if (last_item -> kind != Constant_kind ) {
1465+ RAISE_SYNTAX_ERROR_KNOWN_LOCATION (
1466+ last_item ,
1467+ "d-string must end with an indent line"
1468+ );
1469+ return NULL ;
1470+ }
1471+
1472+ Py_ssize_t blen ;
1473+ const char * bstr = PyUnicode_AsUTF8AndSize (last_item -> v .Constant .value , & blen );
1474+ if (bstr == NULL ) {
1475+ return NULL ;
1476+ }
1477+
1478+ // memrchr is GNU extension; use manual loop for portability.
1479+ const char * lastline = bstr + blen ;
1480+ while (bstr < lastline ) {
1481+ if (lastline [-1 ] == '\n' ) {
1482+ break ;
1483+ }
1484+ lastline -- ;
1485+ if (* lastline != ' ' && * lastline != '\t' ) {
1486+ RAISE_SYNTAX_ERROR_KNOWN_LOCATION (
1487+ last_item ,
1488+ "d-string must end with an indent line"
1489+ );
1490+ return NULL ;
1491+ }
1492+ }
1493+
1494+ // checks indent of the last line.
1495+ indent_count = bstr + blen - lastline ;
1496+ if (indent_count > 0 ) {
1497+ indent_char = lastline [0 ];
1498+
1499+ for (Py_ssize_t i = 1 ; i < indent_count ; i ++ ) {
1500+ if (lastline [i ] != indent_char ) {
1501+ RAISE_ERROR_KNOWN_LOCATION (
1502+ p , PyExc_TabError , last_item -> end_lineno , i , last_item -> end_lineno , i + 1 ,
1503+ "inconsistent use of tabs and spaces in indentation"
1504+ );
1505+ return NULL ;
1506+ }
1507+ }
1508+ }
1509+ }
1510+
13491511 Py_ssize_t index = 0 ;
13501512 for (Py_ssize_t i = 0 ; i < n_items ; i ++ ) {
13511513 expr_ty item = asdl_seq_GET (raw_expressions , i );
@@ -1377,7 +1539,7 @@ _get_resized_exprs(Parser *p, Token *a, asdl_expr_seq *raw_expressions, Token *b
13771539 }
13781540
13791541 if (item -> kind == Constant_kind ) {
1380- item = _PyPegen_decode_fstring_part (p , is_raw , item , b );
1542+ item = _PyPegen_decode_fstring_part (p , i == 0 , is_raw , indent_char , indent_count , item , b );
13811543 if (item == NULL ) {
13821544 return NULL ;
13831545 }
0 commit comments