@@ -3170,12 +3170,28 @@ def read_index_node(
3170
3170
** kwargs ,
3171
3171
)
3172
3172
else :
3173
- index = factory (
3174
- _unconvert_index (
3175
- data , kind , encoding = self .encoding , errors = self .errors
3176
- ),
3177
- ** kwargs ,
3178
- )
3173
+ try :
3174
+ index = factory (
3175
+ _unconvert_index (
3176
+ data , kind , encoding = self .encoding , errors = self .errors
3177
+ ),
3178
+ ** kwargs ,
3179
+ )
3180
+ except UnicodeEncodeError as err :
3181
+ if (
3182
+ self .errors == "surrogatepass"
3183
+ and get_option ("future.infer_string" )
3184
+ and str (err ).endswith ("surrogates not allowed" )
3185
+ ):
3186
+ index = factory (
3187
+ _unconvert_index (
3188
+ data , kind , encoding = self .encoding , errors = self .errors
3189
+ ),
3190
+ dtype = "object" ,
3191
+ ** kwargs ,
3192
+ )
3193
+ else :
3194
+ raise
3179
3195
3180
3196
index .name = name
3181
3197
@@ -3311,13 +3327,19 @@ def read(
3311
3327
self .validate_read (columns , where )
3312
3328
index = self .read_index ("index" , start = start , stop = stop )
3313
3329
values = self .read_array ("values" , start = start , stop = stop )
3314
- result = Series (values , index = index , name = self .name , copy = False )
3315
- if (
3316
- using_string_dtype ()
3317
- and isinstance (values , np .ndarray )
3318
- and is_string_array (values , skipna = True )
3319
- ):
3320
- result = result .astype (StringDtype (na_value = np .nan ))
3330
+ try :
3331
+ result = Series (values , index = index , name = self .name , copy = False )
3332
+ except UnicodeEncodeError as err :
3333
+ if (
3334
+ self .errors == "surrogatepass"
3335
+ and using_string_dtype ()
3336
+ and str (err ).endswith ("surrogates not allowed" )
3337
+ ):
3338
+ result = Series (
3339
+ values , index = index , name = self .name , copy = False , dtype = "object"
3340
+ )
3341
+ else :
3342
+ raise
3321
3343
return result
3322
3344
3323
3345
def write (self , obj , ** kwargs ) -> None :
@@ -5224,7 +5246,7 @@ def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.nd
5224
5246
# encode if needed
5225
5247
if len (data ):
5226
5248
data = (
5227
- Series (data .ravel (), copy = False )
5249
+ Series (data .ravel (), copy = False , dtype = "object" )
5228
5250
.str .encode (encoding , errors )
5229
5251
._values .reshape (data .shape )
5230
5252
)
@@ -5264,7 +5286,9 @@ def _unconvert_string_array(
5264
5286
dtype = f"U{ itemsize } "
5265
5287
5266
5288
if isinstance (data [0 ], bytes ):
5267
- ser = Series (data , copy = False ).str .decode (encoding , errors = errors )
5289
+ ser = Series (data , copy = False ).str .decode (
5290
+ encoding , errors = errors , dtype = "object"
5291
+ )
5268
5292
data = ser .to_numpy ()
5269
5293
data .flags .writeable = True
5270
5294
else :
0 commit comments