@@ -14270,29 +14270,22 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
1427014270    return  Py_BuildValue ("(N)" , copy );
1427114271}
1427214272
14273- /* Dedent a string. 
14274-    Behaviour is expected to be an exact match of `textwrap.dedent`. 
14275-    Return a new reference on success, NULL with exception set on error. 
14276-    */ 
14277- PyAPI_FUNC (PyObject  * )
14278- _PyUnicode_Dedent (PyObject  * unicode )
14279- {
14280-     Py_ssize_t  src_len  =  0 ;
14281-     const  char  * src  =  PyUnicode_AsUTF8AndSize (unicode , & src_len );
14282-     if  (!src ) {
14283-         return  NULL ;
14284-     }
14285-     if  (src_len  <= 0 ) {
14286-         Py_INCREF (unicode );
14287-         return  unicode ;
14288-     }
14289- 
14290-     const  char  * end  =  src  +  src_len ;
14291- 
14292-     // [candidate_start, candidate_start + candidate_len) 
14273+ /* 
14274+ This function searchs the longest common leading whitespace 
14275+ of all lines in the [src, end). 
14276+ It returns the length of the common leading whitespace and sets `output` to 
14277+ point to the beginning of the common leading whitespace if length > 0. 
14278+ */ 
14279+ static  Py_ssize_t 
14280+ search_longest_common_leading_whitespace (
14281+     const  char  *  const  src ,
14282+     const  char  *  const  end ,
14283+     const  char  *  *  output 
14284+ ) {
14285+     // [_start, _start + _len) 
1429314286    // describes the current longest common leading whitespace 
14294-     const  char  * candidate_start  =  NULL ;
14295-     Py_ssize_t  candidate_len  =  0 ;
14287+     const  char  * _start  =  NULL ;
14288+     Py_ssize_t  _len  =  0 ;
1429614289
1429714290    for  (const  char  * iter  =  src ; iter  <  end ; ++ iter ) {
1429814291        const  char  * line_start  =  iter ;
@@ -14305,8 +14298,7 @@ _PyUnicode_Dedent(PyObject *unicode)
1430514298                   in this line */ 
1430614299                if  (iter  ==  line_start ) {
1430714300                    // some line has no indent, fast exit! 
14308-                     Py_INCREF (unicode );
14309-                     return  unicode ;
14301+                     return  0 ;
1431014302                }
1431114303                leading_whitespace_end  =  iter ;
1431214304            }
@@ -14318,47 +14310,73 @@ _PyUnicode_Dedent(PyObject *unicode)
1431814310            continue ;
1431914311        }
1432014312
14321-         if  (!candidate_start ) {
14313+         if  (!_start ) {
1432214314            // update the first leading whitespace 
14323-             candidate_start  =  line_start ;
14324-             candidate_len  =  leading_whitespace_end  -  line_start ;
14325-             assert (candidate_len  >  0 );
14326-         } else  {
14315+             _start  =  line_start ;
14316+             _len  =  leading_whitespace_end  -  line_start ;
14317+             assert (_len  >  0 );
14318+         }
14319+         else  {
1432714320            /* We then compare with the current longest leading whitespace. 
1432814321
14329-                [line_start, leading_whitespace_end) is the leading whitespace of  
14330-                this line, 
14322+                [line_start, leading_whitespace_end) is the leading 
14323+                whitespace of  this line, 
1433114324
14332-                [candidate_start, candidate_start  + candidate_len)  
14333-                is the  leading whitespace of the current longest leading  
14334-                whitespace. */  
14335-             Py_ssize_t   new_candidate_len   =   0 ;
14325+                [_start, _start  + _len) is the leading whitespace of the  
14326+                current longest  leading whitespace. */  
14327+             Py_ssize_t   new_len   =   0 ; 
14328+             const   char   * _iter   =   _start ,  * line_iter   =   line_start ;
1433614329
14337-             for  (const  char  * candidate_iter  =  candidate_start ,
14338-                             * line_iter  =  line_start ;
14339-                  candidate_iter  <  candidate_start  +  candidate_len  && 
14340-                  line_iter  <  leading_whitespace_end ;
14341-                  ++ candidate_iter , ++ line_iter ) {
14342-                 if  (* candidate_iter  !=  * line_iter ) {
14343-                     break ;
14344-                 }
14345-                 ++ new_candidate_len ;
14330+             while  (_iter  <  _start  +  _len  &&  line_iter  <  leading_whitespace_end 
14331+                 &&  * _iter  ==  * line_iter )
14332+             {
14333+                 ++ _iter ;
14334+                 ++ line_iter ;
14335+                 ++ new_len ;
1434614336            }
1434714337
14348-             candidate_len  =  new_candidate_len ;
14349-             if  (candidate_len  ==  0 ) {
14338+             _len  =  new_len ;
14339+             if  (_len  ==  0 ) {
1435014340                // No common things now, fast exit! 
14351-                 Py_INCREF (unicode );
14352-                 return  unicode ;
14341+                 return  0 ;
1435314342            }
1435414343        }
1435514344    }
1435614345
14357-     assert (candidate_len  >= 0 );
14358-     /* Final check for strings that contain nothing but whitespace. */ 
14359-     if  (candidate_len  ==  0 ) {
14360-         Py_INCREF (unicode );
14361-         return  unicode ;
14346+     assert (_len  >= 0 );
14347+     if  (_len  >  0 ) {
14348+         * output  =  _start ;
14349+     }
14350+     return  _len ;
14351+ }
14352+ 
14353+ /* Dedent a string. 
14354+    Behaviour is expected to be an exact match of `textwrap.dedent`. 
14355+    Return a new reference on success, NULL with exception set on error. 
14356+    */ 
14357+ PyObject  * 
14358+ _PyUnicode_Dedent (PyObject  * unicode )
14359+ {
14360+     Py_ssize_t  src_len  =  0 ;
14361+     const  char  * src  =  PyUnicode_AsUTF8AndSize (unicode , & src_len );
14362+     if  (!src ) {
14363+         return  NULL ;
14364+     }
14365+     assert (src_len  >= 0 );
14366+     if  (src_len  ==  0 ) {
14367+         return  Py_NewRef (unicode );
14368+     }
14369+ 
14370+     const  char  * const  end  =  src  +  src_len ;
14371+ 
14372+     // [whitespace_start, whitespace_start + whitespace_len) 
14373+     // describes the current longest common leading whitespace 
14374+     const  char  * whitespace_start  =  NULL ;
14375+     Py_ssize_t  whitespace_len  =  search_longest_common_leading_whitespace (
14376+         src , end , & whitespace_start );
14377+ 
14378+     if  (whitespace_len  ==  0 ) {
14379+         return  Py_NewRef (unicode );
1436214380    }
1436314381
1436414382    // now we should trigger a dedent 
@@ -14390,12 +14408,12 @@ _PyUnicode_Dedent(PyObject *unicode)
1439014408            continue ;
1439114409        }
1439214410
14393-         /* copy [new_line_start + candidate_len , iter) to buffer, then 
14411+         /* copy [new_line_start + whitespace_len , iter) to buffer, then 
1439414412            conditionally append '\n' */ 
1439514413
14396-         Py_ssize_t  new_line_len  =  iter  -  line_start  -  candidate_len ;
14414+         Py_ssize_t  new_line_len  =  iter  -  line_start  -  whitespace_len ;
1439714415        assert (new_line_len  >= 0 );
14398-         memcpy (dest_iter , line_start  +  candidate_len , new_line_len );
14416+         memcpy (dest_iter , line_start  +  whitespace_len , new_line_len );
1439914417
1440014418        dest_iter  +=  new_line_len ;
1440114419
0 commit comments