@@ -14309,126 +14309,155 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
1430914309}
1431014310
1431114311/* 
14312-    Find the longest common leading whitespace among a list of lines. 
14313-    Whitespace-only lines are ignored. 
14314-    Returns the margin length (>= 0). 
14312+ This function searches the longest common leading whitespace 
14313+ of all lines in the [src, end). 
14314+ It returns the length of the common leading whitespace and sets *output* to 
14315+ point to the beginning of the common leading whitespace if length > 0. 
1431514316*/ 
1431614317static  Py_ssize_t 
14317- search_longest_common_leading_whitespace (PyObject  * lines , Py_ssize_t  nlines )
14318- {
14319-     PyObject  * smallest  =  NULL , * largest  =  NULL ;
14320-     for  (Py_ssize_t  i  =  0 ; i  <  nlines ; i ++ ) {
14321-         PyObject  * line  =  PyList_GET_ITEM (lines , i );
14322-         Py_ssize_t  linelen  =  PyUnicode_GET_LENGTH (line );
14323- 
14324-         if  (linelen  ==  0 ) {
14325-             continue ;
14326-         }
14327- 
14328-         int  kind  =  PyUnicode_KIND (line );
14329-         void  * data  =  PyUnicode_DATA (line );
14330-         int  all_ws  =  1 ;
14331-         for  (Py_ssize_t  j  =  0 ; j  <  linelen ; j ++ ) {
14332-             if  (!Py_UNICODE_ISSPACE (PyUnicode_READ (kind , data , j ))) {
14333-                 all_ws  =  0 ;
14334-                 break ;
14318+ search_longest_common_leading_whitespace (
14319+     const  char  * const  src ,
14320+     const  char  * const  end ,
14321+     const  char  * * output )
14322+ {
14323+     // [_start, _start + _len) 
14324+     // describes the current longest common leading whitespace 
14325+     const  char  * _start  =  NULL ;
14326+     Py_ssize_t  _len  =  0 ;
14327+ 
14328+     for  (const  char  * iter  =  src ; iter  <  end ; ++ iter ) {
14329+         const  char  * line_start  =  iter ;
14330+         const  char  * leading_whitespace_end  =  NULL ;
14331+ 
14332+         // scan the whole line 
14333+         while  (iter  <  end  &&  * iter  !=  '\n' ) {
14334+             if  (!leading_whitespace_end  &&  !Py_ISSPACE (Py_CHARMASK (* iter ))) {
14335+                 if  (iter  ==  line_start ) {
14336+                     // some line has no indent, fast exit! 
14337+                     return  0 ;
14338+                 }
14339+                 leading_whitespace_end  =  iter ;
1433514340            }
14341+             ++ iter ;
1433614342        }
14337-         if  (all_ws ) {
14343+ 
14344+         // if this line has all white space, skip it 
14345+         if  (!leading_whitespace_end ) {
1433814346            continue ;
1433914347        }
1434014348
14341-         if  (smallest  ==  NULL  ||  PyObject_RichCompareBool (line , smallest , Py_LT )) {
14342-             smallest  =  line ;
14349+         if  (!_start ) {
14350+             // update the first leading whitespace 
14351+             _start  =  line_start ;
14352+             _len  =  leading_whitespace_end  -  line_start ;
14353+             assert (_len  >  0 );
1434314354        }
14344-         if  (largest  ==  NULL  ||  PyObject_RichCompareBool (line , largest , Py_GT )) {
14345-             largest  =  line ;
14346-         }
14347-     }
14355+         else  {
14356+             /* We then compare with the current longest leading whitespace. 
1434814357
14349-     if  (smallest  ==  NULL  ||  largest  ==  NULL ) {
14350-         return  0 ;
14351-     }
14358+                [line_start, leading_whitespace_end) is the leading 
14359+                whitespace of this line, 
1435214360
14353-     Py_ssize_t  margin  =  0 ;
14354-     Py_ssize_t  minlen  =  Py_MIN (PyUnicode_GET_LENGTH (smallest ),
14355-                                PyUnicode_GET_LENGTH (largest ));
14356-     int  skind  =  PyUnicode_KIND (smallest );
14357-     int  lkind  =  PyUnicode_KIND (largest );
14358-     const  void  * sdata  =  PyUnicode_DATA (smallest );
14359-     const  void  * ldata  =  PyUnicode_DATA (largest );
14361+                [_start, _start + _len) is the leading whitespace of the 
14362+                current longest leading whitespace. */ 
14363+             Py_ssize_t  new_len  =  0 ;
14364+             const  char  * _iter  =  _start , * line_iter  =  line_start ;
1436014365
14361-     while  (margin  <  minlen ) {
14362-         Py_UCS4  c1  =  PyUnicode_READ (skind , sdata , margin );
14363-         Py_UCS4  c2  =  PyUnicode_READ (lkind , ldata , margin );
14364-         if  (c1  !=  c2  ||  !(c1  ==  ' '  ||  c1  ==  '\t' )) {
14365-             break ;
14366+             while  (_iter  <  _start  +  _len  &&  line_iter  <  leading_whitespace_end 
14367+                    &&  * _iter  ==  * line_iter )
14368+             {
14369+                 ++ _iter ;
14370+                 ++ line_iter ;
14371+                 ++ new_len ;
14372+             }
14373+ 
14374+             _len  =  new_len ;
14375+             if  (_len  ==  0 ) {
14376+                 // No common things now, fast exit! 
14377+                 return  0 ;
14378+             }
1436614379        }
14367-         margin ++ ;
1436814380    }
1436914381
14370-     return  margin ;
14382+     assert (_len  >= 0 );
14383+     if  (_len  >  0 ) {
14384+         * output  =  _start ;
14385+     }
14386+     return  _len ;
1437114387}
1437214388
1437314389/* Dedent a string. 
14374-    Behaviour is expected to be an exact match of ` textwrap.dedent` . 
14375-    Return a new reference on success, NULL with exception set on error. 
14390+    Behaviour is expected to be an exact match of textwrap.dedent. 
14391+    Return a new reference on success, NULL with an  exception set on error. 
1437614392   */ 
1437714393PyObject  * 
1437814394_PyUnicode_Dedent (PyObject  * unicode )
1437914395{
14380-     PyObject  * sep  =  PyUnicode_FromString ("\n" );
14381-     if  (sep  ==  NULL ) {
14396+     Py_ssize_t  src_len  =  0 ;
14397+     const  char  * src  =  PyUnicode_AsUTF8AndSize (unicode , & src_len );
14398+     if  (!src ) {
1438214399        return  NULL ;
1438314400    }
14384-     PyObject  * lines  =  PyUnicode_Split (unicode , sep , -1 );
14385-     Py_DECREF (sep );
14386-     if  (lines  ==  NULL ) {
14387-         return  NULL ;
14401+     assert (src_len  >= 0 );
14402+     if  (src_len  ==  0 ) {
14403+         return  Py_NewRef (unicode );
1438814404    }
14389-     Py_ssize_t  nlines  =  PyList_GET_SIZE (lines );
14390-     Py_ssize_t  margin  =  search_longest_common_leading_whitespace (lines , nlines );
1439114405
14392-     PyUnicodeWriter  * writer  =  PyUnicodeWriter_Create (0 );
14393-     if  (writer  ==  NULL ) {
14394-         Py_DECREF (lines );
14406+     const  char  * const  end  =  src  +  src_len ;
14407+ 
14408+     // [whitespace_start, whitespace_start + whitespace_len) 
14409+     // describes the current longest common leading whitespace 
14410+     const  char  * whitespace_start  =  NULL ;
14411+     const  Py_ssize_t  whitespace_len  =  search_longest_common_leading_whitespace (
14412+         src , end , & whitespace_start );
14413+ 
14414+     // now we should trigger a dedent 
14415+     char  * dest  =  PyMem_Malloc (src_len );
14416+     if  (!dest ) {
14417+         PyErr_NoMemory ();
1439514418        return  NULL ;
1439614419    }
14420+     char  * dest_iter  =  dest ;
1439714421
14398-     for  (Py_ssize_t   i   =   0 ;  i  <  nlines ;  i ++ ) {
14399-         PyObject   * line  =  PyList_GET_ITEM ( lines ,  i ) ;
14400-         Py_ssize_t   linelen  =  PyUnicode_GET_LENGTH ( line ) ;
14422+     for  (const   char   * iter   =   src ;  iter  <  end ;  ++ iter ) {
14423+         const   char   * line_start  =  iter ;
14424+         bool   in_leading_space  =  true ;
1440114425
14402-         int  all_ws  =  1 ;
14403-         int  kind  =  PyUnicode_KIND (line );
14404-         void  * data  =  PyUnicode_DATA (line );
14405-         for  (Py_ssize_t  j  =  0 ; j  <  linelen ; j ++ ) {
14406-             if  (!Py_UNICODE_ISSPACE (PyUnicode_READ (kind , data , j ))) {
14407-                 all_ws  =  0 ;
14408-                 break ;
14426+         // iterate over a line to find the end of a line 
14427+         while  (iter  <  end  &&  * iter  !=  '\n' ) {
14428+             if  (in_leading_space  &&  !Py_ISSPACE (Py_CHARMASK (* iter ))) {
14429+                 in_leading_space  =  false;
1440914430            }
14431+             ++ iter ;
1441014432        }
1441114433
14412-         if  (!all_ws ) {
14413-             Py_ssize_t  start  =  Py_MIN (margin , linelen );
14414-             if  (PyUnicodeWriter_WriteSubstring (writer , line , start , linelen ) <  0 ) {
14415-                 PyUnicodeWriter_Discard (writer );
14416-                 Py_DECREF (lines );
14417-                 return  NULL ;
14434+         // invariant: *iter == '\n' or iter == end 
14435+         const  bool  append_newline  =  iter  <  end ;
14436+ 
14437+         // if this line has all white space, write '\n' and continue 
14438+         if  (in_leading_space ) {
14439+             if  (append_newline ) {
14440+                 * dest_iter ++  =  '\n' ;
1441814441            }
14442+             continue ;
1441914443        }
1442014444
14421-         if  (i  <  nlines  -  1 ) {
14422-             if  (PyUnicodeWriter_WriteChar (writer , '\n' ) <  0 ) {
14423-                 PyUnicodeWriter_Discard (writer );
14424-                 Py_DECREF (lines );
14425-                 return  NULL ;
14426-             }
14445+         /* copy [new_line_start + whitespace_len, iter) to buffer, then 
14446+             conditionally append '\n' */ 
14447+         const  Py_ssize_t  new_line_len  =  iter  -  line_start  -  whitespace_len ;
14448+         assert (new_line_len  >= 0 );
14449+         memcpy (dest_iter , line_start  +  whitespace_len , new_line_len );
14450+ 
14451+         dest_iter  +=  new_line_len ;
14452+ 
14453+         if  (append_newline ) {
14454+             * dest_iter ++  =  '\n' ;
1442714455        }
1442814456    }
1442914457
14430-     Py_DECREF (lines );
14431-     return  PyUnicodeWriter_Finish (writer );
14458+     PyObject  * res  =  PyUnicode_FromStringAndSize (dest , dest_iter  -  dest );
14459+     PyMem_Free (dest );
14460+     return  res ;
1443214461}
1443314462
1443414463static  PyMethodDef  unicode_methods [] =  {
0 commit comments