@@ -14309,83 +14309,65 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
1430914309}
1431014310
1431114311/* 
14312- This function searchs the longest common leading whitespace 
14313- of all lines in the [src, end). 
14314- It returns the length of the common leading whitespace and sets `output` to 
14315- point to the beginning of the common leading whitespace if length > 0. 
14312+    Find the longest common leading whitespace among a list of lines. 
14313+    Whitespace-only lines are ignored. 
14314+    Returns the margin length (>= 0). 
1431614315*/ 
1431714316static  Py_ssize_t 
14318- search_longest_common_leading_whitespace (
14319-     const  char  * const  src ,
14320-     const  char  * const  end ,
14321-     const  char  * * output )
14322- {
14323-     // [_start, _start + _len) 
14324-     // describes the current longest common leading whitespace 
14325-     const  char  * _start  =  NULL ;
14326-     Py_ssize_t  _len  =  0 ;
14327- 
14328-     for  (const  char  * iter  =  src ; iter  <  end ; ++ iter ) {
14329-         const  char  * line_start  =  iter ;
14330-         const  char  * leading_whitespace_end  =  NULL ;
14331- 
14332-         // scan the whole line 
14333-         while  (iter  <  end  &&  * iter  !=  '\n' ) {
14334-             if  (!leading_whitespace_end  &&  * iter  !=  ' '  &&  * iter  !=  '\t' ) {
14335-                 /* `iter` points to the first non-whitespace character 
14336-                    in this line */ 
14337-                 if  (iter  ==  line_start ) {
14338-                     // some line has no indent, fast exit! 
14339-                     return  0 ;
14340-                 }
14341-                 leading_whitespace_end  =  iter ;
14342-             }
14343-             ++ iter ;
14344-         }
14317+ search_longest_common_leading_whitespace (PyObject  * lines , Py_ssize_t  nlines )
14318+ {
14319+     PyObject  * smallest  =  NULL , * largest  =  NULL ;
14320+     for  (Py_ssize_t  i  =  0 ; i  <  nlines ; i ++ ) {
14321+         PyObject  * line  =  PyList_GET_ITEM (lines , i );
14322+         Py_ssize_t  linelen  =  PyUnicode_GET_LENGTH (line );
1434514323
14346-         // if this line has all white space, skip it 
14347-         if  (!leading_whitespace_end ) {
14324+         if  (linelen  ==  0 ) {
1434814325            continue ;
1434914326        }
1435014327
14351-         if  (!_start ) {
14352-             // update the first leading whitespace 
14353-             _start  =  line_start ;
14354-             _len  =  leading_whitespace_end  -  line_start ;
14355-             assert (_len  >  0 );
14328+         int  kind  =  PyUnicode_KIND (line );
14329+         void  * data  =  PyUnicode_DATA (line );
14330+         int  all_ws  =  1 ;
14331+         for  (Py_ssize_t  j  =  0 ; j  <  linelen ; j ++ ) {
14332+             if  (!Py_UNICODE_ISSPACE (PyUnicode_READ (kind , data , j ))) {
14333+                 all_ws  =  0 ;
14334+                 break ;
14335+             }
14336+         }
14337+         if  (all_ws ) {
14338+             continue ;
1435614339        }
14357-         else  {
14358-             /* We then compare with the current longest leading whitespace. 
1435914340
14360-                [line_start, leading_whitespace_end) is the leading 
14361-                whitespace of this line, 
14341+         if  (smallest  ==  NULL  ||  PyObject_RichCompareBool (line , smallest , Py_LT )) {
14342+             smallest  =  line ;
14343+         }
14344+         if  (largest  ==  NULL  ||  PyObject_RichCompareBool (line , largest , Py_GT )) {
14345+             largest  =  line ;
14346+         }
14347+     }
1436214348
14363-                [_start, _start + _len) is the leading whitespace of the 
14364-                current longest leading whitespace. */ 
14365-             Py_ssize_t  new_len  =  0 ;
14366-             const  char  * _iter  =  _start , * line_iter  =  line_start ;
14349+     if  (smallest  ==  NULL  ||  largest  ==  NULL ) {
14350+         return  0 ;
14351+     }
1436714352
14368-              while  ( _iter   <   _start   +   _len   &&   line_iter   <   leading_whitespace_end 
14369-                     &&   * _iter   ==   * line_iter ) 
14370-             { 
14371-                  ++ _iter ;
14372-                  ++ line_iter ;
14373-                  ++ new_len ;
14374-             } 
14353+     Py_ssize_t   margin   =   0 ; 
14354+     Py_ssize_t   minlen   =   Py_MIN ( PyUnicode_GET_LENGTH ( smallest ), 
14355+                                 PyUnicode_GET_LENGTH ( largest )); 
14356+     int   skind   =   PyUnicode_KIND ( smallest ) ;
14357+     int   lkind   =   PyUnicode_KIND ( largest ) ;
14358+     const   void   * sdata   =   PyUnicode_DATA ( smallest ) ;
14359+     const   void   * ldata   =   PyUnicode_DATA ( largest ); 
1437514360
14376-              _len   =   new_len ; 
14377-              if  ( _len   ==   0 ) { 
14378-                  // No common things now, fast exit! 
14379-                  return   0 ; 
14380-             } 
14361+     while  ( margin   <   minlen ) { 
14362+         Py_UCS4   c1   =   PyUnicode_READ ( skind ,  sdata ,  margin ); 
14363+         Py_UCS4   c2   =   PyUnicode_READ ( lkind ,  ldata ,  margin ); 
14364+         if  ( c1   !=   c2   ||  !( c1   ==   ' '   ||   c1   ==   '\t' )) { 
14365+             break ; 
1438114366        }
14367+         margin ++ ;
1438214368    }
1438314369
14384-     assert (_len  >= 0 );
14385-     if  (_len  >  0 ) {
14386-         * output  =  _start ;
14387-     }
14388-     return  _len ;
14370+     return  margin ;
1438914371}
1439014372
1439114373/* Dedent a string. 
@@ -14395,74 +14377,58 @@ search_longest_common_leading_whitespace(
1439514377PyObject  * 
1439614378_PyUnicode_Dedent (PyObject  * unicode )
1439714379{
14398-     Py_ssize_t  src_len  =  0 ;
14399-     const  char  * src  =  PyUnicode_AsUTF8AndSize (unicode , & src_len );
14400-     if  (!src ) {
14380+     PyObject  * sep  =  PyUnicode_FromString ("\n" );
14381+     if  (sep  ==  NULL ) {
1440114382        return  NULL ;
1440214383    }
14403-     assert (src_len  >= 0 );
14404-     if  (src_len  ==  0 ) {
14405-         return  Py_NewRef (unicode );
14406-     }
14407- 
14408-     const  char  * const  end  =  src  +  src_len ;
14409- 
14410-     // [whitespace_start, whitespace_start + whitespace_len) 
14411-     // describes the current longest common leading whitespace 
14412-     const  char  * whitespace_start  =  NULL ;
14413-     Py_ssize_t  whitespace_len  =  search_longest_common_leading_whitespace (
14414-         src , end , & whitespace_start );
14415- 
14416-     if  (whitespace_len  ==  0 ) {
14417-         return  Py_NewRef (unicode );
14384+     PyObject  * lines  =  PyUnicode_Split (unicode , sep , -1 );
14385+     Py_DECREF (sep );
14386+     if  (lines  ==  NULL ) {
14387+         return  NULL ;
1441814388    }
14389+     Py_ssize_t  nlines  =  PyList_GET_SIZE (lines );
14390+     Py_ssize_t  margin  =  search_longest_common_leading_whitespace (lines , nlines );
1441914391
14420-     // now we should trigger a dedent 
14421-     char  * dest  =  PyMem_Malloc (src_len );
14422-     if  (!dest ) {
14423-         PyErr_NoMemory ();
14392+     PyUnicodeWriter  * writer  =  PyUnicodeWriter_Create (0 );
14393+     if  (writer  ==  NULL ) {
14394+         Py_DECREF (lines );
1442414395        return  NULL ;
1442514396    }
14426-     char  * dest_iter  =  dest ;
1442714397
14428-     for  (const   char   * iter   =   src ;  iter  <  end ;  ++ iter ) {
14429-         const   char   * line_start  =  iter ;
14430-         bool   in_leading_space  =  true ;
14398+     for  (Py_ssize_t   i   =   0 ;  i  <  nlines ;  i ++ ) {
14399+         PyObject   * line  =  PyList_GET_ITEM ( lines ,  i ) ;
14400+         Py_ssize_t   linelen  =  PyUnicode_GET_LENGTH ( line ) ;
1443114401
14432-         // iterate over a line to find the end of a line 
14433-         while  (iter  <  end  &&  * iter  !=  '\n' ) {
14434-             if  (in_leading_space  &&  * iter  !=  ' '  &&  * iter  !=  '\t' ) {
14435-                 in_leading_space  =  false;
14402+         int  all_ws  =  1 ;
14403+         int  kind  =  PyUnicode_KIND (line );
14404+         void  * data  =  PyUnicode_DATA (line );
14405+         for  (Py_ssize_t  j  =  0 ; j  <  linelen ; j ++ ) {
14406+             if  (!Py_UNICODE_ISSPACE (PyUnicode_READ (kind , data , j ))) {
14407+                 all_ws  =  0 ;
14408+                 break ;
1443614409            }
14437-             ++ iter ;
1443814410        }
1443914411
14440-         // invariant: *iter == '\n' or iter == end 
14441-         bool   append_newline   =   iter   <   end ;
14442- 
14443-         // if this line has all white space, write '\n' and continue 
14444-         if  ( in_leading_space   &&   append_newline ) { 
14445-             * dest_iter ++   =   '\n' ;
14446-             continue ; 
14412+         if  (! all_ws ) { 
14413+              Py_ssize_t   start   =   Py_MIN ( margin ,  linelen ) ;
14414+              if  ( PyUnicodeWriter_WriteSubstring ( writer ,  line ,  start ,  linelen )  <   0 ) { 
14415+                  PyUnicodeWriter_Discard ( writer ); 
14416+                  Py_DECREF ( lines ); 
14417+                  return   NULL ;
14418+             } 
1444714419        }
1444814420
14449-         /* copy [new_line_start + whitespace_len, iter) to buffer, then 
14450-             conditionally append '\n' */ 
14451- 
14452-         Py_ssize_t  new_line_len  =  iter  -  line_start  -  whitespace_len ;
14453-         assert (new_line_len  >= 0 );
14454-         memcpy (dest_iter , line_start  +  whitespace_len , new_line_len );
14455- 
14456-         dest_iter  +=  new_line_len ;
14457- 
14458-         if  (append_newline ) {
14459-             * dest_iter ++  =  '\n' ;
14421+         if  (i  <  nlines  -  1 ) {
14422+             if  (PyUnicodeWriter_WriteChar (writer , '\n' ) <  0 ) {
14423+                 PyUnicodeWriter_Discard (writer );
14424+                 Py_DECREF (lines );
14425+                 return  NULL ;
14426+             }
1446014427        }
1446114428    }
1446214429
14463-     PyObject  * res  =  PyUnicode_FromStringAndSize (dest , dest_iter  -  dest );
14464-     PyMem_Free (dest );
14465-     return  res ;
14430+     Py_DECREF (lines );
14431+     return  PyUnicodeWriter_Finish (writer );
1446614432}
1446714433
1446814434static  PyMethodDef  unicode_methods [] =  {
0 commit comments