@@ -2954,8 +2954,10 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
29542954 * The 'start' can be negative or not, but when adjusting the value, 
29552955 * we clip it in [0, max(0, objlen - 1)] and do not interpret it as 
29562956 * a relative offset. 
2957+  * 
2958+  * This function always succeeds. 
29572959 */ 
2958- static  inline   Py_ssize_t 
2960+ static  Py_ssize_t 
29592961unicode_error_adjust_start (Py_ssize_t  start , Py_ssize_t  objlen )
29602962{
29612963    assert (objlen  >= 0 );
@@ -2969,14 +2971,34 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
29692971}
29702972
29712973
2974+ /* Assert some properties of the adjusted 'start' value. */ 
2975+ #ifndef  NDEBUG 
2976+ static  void 
2977+ assert_adjusted_unicode_error_start (Py_ssize_t  start , Py_ssize_t  objlen )
2978+ {
2979+     assert (objlen  >= 0 );
2980+     /* in the future, `min_start` may be something else */ 
2981+     Py_ssize_t  min_start  =  0 ;
2982+     assert (start  >= min_start );
2983+     /* in the future, `max_start` may be something else */ 
2984+     Py_ssize_t  max_start  =  Py_MAX (min_start , objlen  -  1 );
2985+     assert (start  <= max_start );
2986+ }
2987+ #else 
2988+ #define  assert_adjusted_unicode_error_start (...)
2989+ #endif 
2990+ 
2991+ 
29722992/* 
29732993 * Adjust the (exclusive) 'end' value of a UnicodeError object. 
29742994 * 
29752995 * The 'end' can be negative or not, but when adjusting the value, 
29762996 * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and 
29772997 * do not interpret it as a relative offset. 
2998+  * 
2999+  * This function always succeeds. 
29783000 */ 
2979- static  inline   Py_ssize_t 
3001+ static  Py_ssize_t 
29803002unicode_error_adjust_end (Py_ssize_t  end , Py_ssize_t  objlen )
29813003{
29823004    assert (objlen  >= 0 );
@@ -2990,6 +3012,59 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
29903012}
29913013
29923014
3015+ /* Assert some properties of the adjusted 'end' value. */ 
3016+ #ifndef  NDEBUG 
3017+ static  void 
3018+ assert_adjusted_unicode_error_end (Py_ssize_t  end , Py_ssize_t  objlen )
3019+ {
3020+     assert (objlen  >= 0 );
3021+     /* in the future, `min_end` may be something else */ 
3022+     Py_ssize_t  min_end  =  Py_MIN (1 , objlen );
3023+     assert (end  >= min_end );
3024+     /* in the future, `max_end` may be something else */ 
3025+     Py_ssize_t  max_end  =  Py_MAX (min_end , objlen );
3026+     assert (end  <= max_end );
3027+ }
3028+ #else 
3029+ #define  assert_adjusted_unicode_error_end (...)
3030+ #endif 
3031+ 
3032+ 
3033+ /* 
3034+  * Adjust the length of the range described by a UnicodeError object. 
3035+  * 
3036+  * The 'start' and 'end' arguments must have been obtained by 
3037+  * unicode_error_adjust_start() and unicode_error_adjust_end(). 
3038+  * 
3039+  * The result is clipped in [0, objlen]. By construction, it 
3040+  * will always be smaller than 'objlen' as 'start' and 'end' 
3041+  * are smaller than 'objlen'. 
3042+  */ 
3043+ static  Py_ssize_t 
3044+ unicode_error_adjust_len (Py_ssize_t  start , Py_ssize_t  end , Py_ssize_t  objlen )
3045+ {
3046+     assert_adjusted_unicode_error_start (start , objlen );
3047+     assert_adjusted_unicode_error_end (end , objlen );
3048+     Py_ssize_t  ranlen  =  end  -  start ;
3049+     assert (ranlen  <= objlen );
3050+     return  ranlen  <  0  ? 0  : ranlen ;
3051+ }
3052+ 
3053+ 
3054+ /* Assert some properties of the adjusted range 'len' value. */ 
3055+ #ifndef  NDEBUG 
3056+ static  void 
3057+ assert_adjusted_unicode_error_len (Py_ssize_t  ranlen , Py_ssize_t  objlen )
3058+ {
3059+     assert (objlen  >= 0 );
3060+     assert (ranlen  >= 0 );
3061+     assert (ranlen  <= objlen );
3062+ }
3063+ #else 
3064+ #define  assert_adjusted_unicode_error_len (...)
3065+ #endif 
3066+ 
3067+ 
29933068/* 
29943069 * Get various common parameters of a UnicodeError object. 
29953070 * 
@@ -3004,22 +3079,24 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
30043079 *     objlen       The 'object' length. 
30053080 *     start        The clipped 'start' attribute. 
30063081 *     end          The clipped 'end' attribute. 
3082+  *     len          The length of the slice described by the clipped 'start' 
3083+  *                  and 'end' values. It always lies in [0, objlen]. 
30073084 * 
30083085 * An output parameter can be NULL to indicate that 
30093086 * the corresponding value does not need to be stored. 
30103087 * 
30113088 * Input parameter: 
30123089 * 
3013-  *     as_bytes     If 1 , the error's 'object' attribute must be a bytes object , 
3014-  *                  i.e. the call  is for  a `UnicodeDecodeError`. Otherwise, the  
3015-  *                  'object' attribute must be a string. 
3090+  *     as_bytes     If true , the error's 'object' attribute must be a ` bytes` , 
3091+  *                  i.e. 'self'  is a `UnicodeDecodeError` instance . Otherwise, 
3092+  *                  the  'object' attribute must be a string. 
30163093 * 
30173094 *                  A TypeError is raised if the 'object' type is incompatible. 
30183095 */ 
30193096int 
30203097_PyUnicodeError_GetParams (PyObject  * self ,
30213098                          PyObject  * * obj , Py_ssize_t  * objlen ,
3022-                           Py_ssize_t  * start , Py_ssize_t  * end ,
3099+                           Py_ssize_t  * start , Py_ssize_t  * end ,  Py_ssize_t   * len , 
30233100                          int  as_bytes )
30243101{
30253102    assert (self  !=  NULL );
@@ -3034,16 +3111,30 @@ _PyUnicodeError_GetParams(PyObject *self,
30343111    if  (objlen  !=  NULL ) {
30353112        * objlen  =  n ;
30363113    }
3114+ 
3115+     Py_ssize_t  start_value  =  -1 ;
3116+     if  (start  !=  NULL  ||  len  !=  NULL ) {
3117+         start_value  =  unicode_error_adjust_start (exc -> start , n );
3118+     }
30373119    if  (start  !=  NULL ) {
3038-         * start  =  unicode_error_adjust_start (exc -> start , n );
3039-         assert (* start  >= 0 );
3040-         assert (* start  <= n );
3120+         assert_adjusted_unicode_error_start (start_value , n );
3121+         * start  =  start_value ;
3122+     }
3123+ 
3124+     Py_ssize_t  end_value  =  -1 ;
3125+     if  (end  !=  NULL  ||  len  !=  NULL ) {
3126+         end_value  =  unicode_error_adjust_end (exc -> end , n );
30413127    }
30423128    if  (end  !=  NULL ) {
3043-         * end  =  unicode_error_adjust_end (exc -> end , n );
3044-         assert (* end  >= 0 );
3045-         assert (* end  <= n );
3129+         assert_adjusted_unicode_error_end (end_value , n );
3130+         * end  =  end_value ;
30463131    }
3132+ 
3133+     if  (len  !=  NULL ) {
3134+         * len  =  unicode_error_adjust_len (start_value , end_value , n );
3135+         assert_adjusted_unicode_error_len (* len , n );
3136+     }
3137+ 
30473138    if  (obj  !=  NULL ) {
30483139        * obj  =  r ;
30493140    }
@@ -3054,6 +3145,16 @@ _PyUnicodeError_GetParams(PyObject *self,
30543145}
30553146
30563147
3148+ inline  int 
3149+ _PyUnicodeError_GetSliceParams (
3150+     PyObject  * self ,
3151+     Py_ssize_t  * start , Py_ssize_t  * end , Py_ssize_t  * len ,
3152+     int  as_bytes 
3153+ ) {
3154+     return  _PyUnicodeError_GetParams (self , NULL , NULL , start , end , len ,
3155+                                      as_bytes );
3156+ }
3157+ 
30573158// --- PyUnicodeEncodeObject: 'encoding' getters ------------------------------ 
30583159// Note: PyUnicodeTranslateError does not have an 'encoding' attribute. 
30593160
@@ -3111,7 +3212,7 @@ static inline int
31113212unicode_error_get_start_impl (PyObject  * self , Py_ssize_t  * start , int  as_bytes )
31123213{
31133214    assert (self  !=  NULL );
3114-     return  _PyUnicodeError_GetParams (self , NULL , NULL ,  start , NULL , as_bytes );
3215+     return  _PyUnicodeError_GetSliceParams (self , start , NULL , NULL , as_bytes );
31153216}
31163217
31173218
@@ -3177,7 +3278,7 @@ static inline int
31773278unicode_error_get_end_impl (PyObject  * self , Py_ssize_t  * end , int  as_bytes )
31783279{
31793280    assert (self  !=  NULL );
3180-     return  _PyUnicodeError_GetParams (self , NULL , NULL , NULL ,  end , as_bytes );
3281+     return  _PyUnicodeError_GetSliceParams (self , NULL , end , NULL , as_bytes );
31813282}
31823283
31833284
0 commit comments