@@ -972,27 +972,30 @@ def convert(arr):
972972 # i.e. maybe_convert_objects didn't convert
973973 arr = maybe_infer_to_datetimelike (arr )
974974 if dtype_backend != "numpy" and arr .dtype == np .dtype ("O" ):
975+ # Addressing (#59242)
976+ # Byte data that could not be decoded into
977+ # a string would throw a UnicodeDecodeError exception
978+
979+ # Try and greedily convert to string
975980 if dtype_backend == "pyarrow" :
976981 pa = import_optional_dependency ("pyarrow" )
977- # Addressing (#59242)
978- # Byte data that could not be decoded into
979- # a string would throw a UnicodeDecodeError exception
980-
981- # Try and greedily convert to pyarrow string
982- # Will fail if the object is bytes:
983- # in this case convert to pyarrow binary
984982 try :
985983 str_dtype = ArrowDtype (pa .string ())
986984 str_cls = str_dtype .construct_array_type ()
987985 arr = str_cls ._from_sequence (arr , dtype = str_dtype )
988986 except pa .lib .ArrowInvalid :
987+ # in this case convert to pyarrow binary
989988 bin_dtype = ArrowDtype (pa .binary ())
990989 bin_cls = bin_dtype .construct_array_type ()
991990 arr = bin_cls ._from_sequence (arr , dtype = bin_dtype )
992991 else :
993- new_dtype = StringDtype ()
994- arr_cls = new_dtype .construct_array_type ()
995- arr = arr_cls ._from_sequence (arr , dtype = new_dtype )
992+ try :
993+ new_dtype = StringDtype ()
994+ arr_cls = new_dtype .construct_array_type ()
995+ arr = arr_cls ._from_sequence (arr , dtype = new_dtype )
996+ except UnicodeDecodeError :
997+ # in this case do nothing
998+ pass
996999
9971000 elif dtype_backend != "numpy" and isinstance (arr , np .ndarray ):
9981001 if arr .dtype .kind in "iufb" :
0 commit comments