@@ -2954,8 +2954,10 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
29542954 * The 'start' can be negative or not, but when adjusting the value,
29552955 * we clip it in [0, max(0, objlen - 1)] and do not interpret it as
29562956 * a relative offset.
2957+ *
2958+ * This function always succeeds.
29572959 */
2958- static inline Py_ssize_t
2960+ static Py_ssize_t
29592961unicode_error_adjust_start (Py_ssize_t start , Py_ssize_t objlen )
29602962{
29612963 assert (objlen >= 0 );
@@ -2969,14 +2971,34 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
29692971}
29702972
29712973
2974+ /* Assert some properties of the adjusted 'start' value. */
2975+ #ifndef NDEBUG
2976+ static void
2977+ assert_adjusted_unicode_error_start (Py_ssize_t start , Py_ssize_t objlen )
2978+ {
2979+ assert (objlen >= 0 );
2980+ /* in the future, `min_start` may be something else */
2981+ Py_ssize_t min_start = 0 ;
2982+ assert (start >= min_start );
2983+ /* in the future, `max_start` may be something else */
2984+ Py_ssize_t max_start = Py_MAX (min_start , objlen - 1 );
2985+ assert (start <= max_start );
2986+ }
2987+ #else
2988+ #define assert_adjusted_unicode_error_start (...)
2989+ #endif
2990+
2991+
29722992/*
29732993 * Adjust the (exclusive) 'end' value of a UnicodeError object.
29742994 *
29752995 * The 'end' can be negative or not, but when adjusting the value,
29762996 * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and
29772997 * do not interpret it as a relative offset.
2998+ *
2999+ * This function always succeeds.
29783000 */
2979- static inline Py_ssize_t
3001+ static Py_ssize_t
29803002unicode_error_adjust_end (Py_ssize_t end , Py_ssize_t objlen )
29813003{
29823004 assert (objlen >= 0 );
@@ -2990,6 +3012,59 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
29903012}
29913013
29923014
3015+ /* Assert some properties of the adjusted 'end' value. */
3016+ #ifndef NDEBUG
3017+ static void
3018+ assert_adjusted_unicode_error_end (Py_ssize_t end , Py_ssize_t objlen )
3019+ {
3020+ assert (objlen >= 0 );
3021+ /* in the future, `min_end` may be something else */
3022+ Py_ssize_t min_end = Py_MIN (1 , objlen );
3023+ assert (end >= min_end );
3024+ /* in the future, `max_end` may be something else */
3025+ Py_ssize_t max_end = Py_MAX (min_end , objlen );
3026+ assert (end <= max_end );
3027+ }
3028+ #else
3029+ #define assert_adjusted_unicode_error_end (...)
3030+ #endif
3031+
3032+
3033+ /*
3034+ * Adjust the length of the range described by a UnicodeError object.
3035+ *
3036+ * The 'start' and 'end' arguments must have been obtained by
3037+ * unicode_error_adjust_start() and unicode_error_adjust_end().
3038+ *
3039+ * The result is clipped in [0, objlen]. By construction, it
3040+ * will always be smaller than 'objlen' as 'start' and 'end'
3041+ * are smaller than 'objlen'.
3042+ */
3043+ static Py_ssize_t
3044+ unicode_error_adjust_len (Py_ssize_t start , Py_ssize_t end , Py_ssize_t objlen )
3045+ {
3046+ assert_adjusted_unicode_error_start (start , objlen );
3047+ assert_adjusted_unicode_error_end (end , objlen );
3048+ Py_ssize_t ranlen = end - start ;
3049+ assert (ranlen <= objlen );
3050+ return ranlen < 0 ? 0 : ranlen ;
3051+ }
3052+
3053+
3054+ /* Assert some properties of the adjusted range 'len' value. */
3055+ #ifndef NDEBUG
3056+ static void
3057+ assert_adjusted_unicode_error_len (Py_ssize_t ranlen , Py_ssize_t objlen )
3058+ {
3059+ assert (objlen >= 0 );
3060+ assert (ranlen >= 0 );
3061+ assert (ranlen <= objlen );
3062+ }
3063+ #else
3064+ #define assert_adjusted_unicode_error_len (...)
3065+ #endif
3066+
3067+
29933068/*
29943069 * Get various common parameters of a UnicodeError object.
29953070 *
@@ -3004,22 +3079,24 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
30043079 * objlen The 'object' length.
30053080 * start The clipped 'start' attribute.
30063081 * end The clipped 'end' attribute.
3082+ * slen The length of the slice described by the clipped 'start'
3083+ * and 'end' values. It always lies in [0, objlen].
30073084 *
30083085 * An output parameter can be NULL to indicate that
30093086 * the corresponding value does not need to be stored.
30103087 *
30113088 * Input parameter:
30123089 *
3013- * as_bytes If 1 , the error's 'object' attribute must be a bytes object ,
3014- * i.e. the call is for a `UnicodeDecodeError`. Otherwise, the
3015- * 'object' attribute must be a string.
3090+ * as_bytes If true , the error's 'object' attribute must be a ` bytes` ,
3091+ * i.e. 'self' is a `UnicodeDecodeError` instance . Otherwise,
3092+ * the 'object' attribute must be a string.
30163093 *
30173094 * A TypeError is raised if the 'object' type is incompatible.
30183095 */
30193096int
30203097_PyUnicodeError_GetParams (PyObject * self ,
30213098 PyObject * * obj , Py_ssize_t * objlen ,
3022- Py_ssize_t * start , Py_ssize_t * end ,
3099+ Py_ssize_t * start , Py_ssize_t * end , Py_ssize_t * slen ,
30233100 int as_bytes )
30243101{
30253102 assert (self != NULL );
@@ -3034,16 +3111,30 @@ _PyUnicodeError_GetParams(PyObject *self,
30343111 if (objlen != NULL ) {
30353112 * objlen = n ;
30363113 }
3114+
3115+ Py_ssize_t start_value = -1 ;
3116+ if (start != NULL || slen != NULL ) {
3117+ start_value = unicode_error_adjust_start (exc -> start , n );
3118+ }
30373119 if (start != NULL ) {
3038- * start = unicode_error_adjust_start (exc -> start , n );
3039- assert (* start >= 0 );
3040- assert (* start <= n );
3120+ assert_adjusted_unicode_error_start (start_value , n );
3121+ * start = start_value ;
3122+ }
3123+
3124+ Py_ssize_t end_value = -1 ;
3125+ if (end != NULL || slen != NULL ) {
3126+ end_value = unicode_error_adjust_end (exc -> end , n );
30413127 }
30423128 if (end != NULL ) {
3043- * end = unicode_error_adjust_end (exc -> end , n );
3044- assert (* end >= 0 );
3045- assert (* end <= n );
3129+ assert_adjusted_unicode_error_end (end_value , n );
3130+ * end = end_value ;
3131+ }
3132+
3133+ if (slen != NULL ) {
3134+ * slen = unicode_error_adjust_len (start_value , end_value , n );
3135+ assert_adjusted_unicode_error_len (* slen , n );
30463136 }
3137+
30473138 if (obj != NULL ) {
30483139 * obj = r ;
30493140 }
@@ -3111,7 +3202,9 @@ static inline int
31113202unicode_error_get_start_impl (PyObject * self , Py_ssize_t * start , int as_bytes )
31123203{
31133204 assert (self != NULL );
3114- return _PyUnicodeError_GetParams (self , NULL , NULL , start , NULL , as_bytes );
3205+ return _PyUnicodeError_GetParams (self , NULL , NULL ,
3206+ start , NULL , NULL ,
3207+ as_bytes );
31153208}
31163209
31173210
@@ -3177,7 +3270,9 @@ static inline int
31773270unicode_error_get_end_impl (PyObject * self , Py_ssize_t * end , int as_bytes )
31783271{
31793272 assert (self != NULL );
3180- return _PyUnicodeError_GetParams (self , NULL , NULL , NULL , end , as_bytes );
3273+ return _PyUnicodeError_GetParams (self , NULL , NULL ,
3274+ NULL , end , NULL ,
3275+ as_bytes );
31813276}
31823277
31833278
0 commit comments