@@ -81,8 +81,7 @@ def _box_pa(
81
81
cls , value , pa_type : pa .DataType | None = None
82
82
) -> pa .Array | pa .ChunkedArray | pa .Scalar :
83
83
"""Box value into a pyarrow Array, ChunkedArray or Scalar."""
84
- if pa_type is not None and pa_type != pa .string ():
85
- raise ValueError (f"Unsupported type '{ pa_type } ' for JSONArray" )
84
+ assert pa_type is None or pa_type == pa .string ()
86
85
87
86
if isinstance (value , pa .Scalar ) or not (
88
87
common .is_list_like (value ) and not common .is_dict_like (value )
@@ -93,8 +92,6 @@ def _box_pa(
93
92
@classmethod
94
93
def _box_pa_scalar (cls , value ) -> pa .Scalar :
95
94
"""Box value into a pyarrow Scalar."""
96
- if isinstance (value , pa .Scalar ):
97
- pa_scalar = value
98
95
if pd .isna (value ):
99
96
pa_scalar = pa .scalar (None , type = pa .string ())
100
97
else :
@@ -104,33 +101,21 @@ def _box_pa_scalar(cls, value) -> pa.Scalar:
104
101
return pa_scalar
105
102
106
103
@classmethod
107
- def _box_pa_array (
108
- cls , value , pa_type : pa .DataType | None = None , copy : bool = False
109
- ) -> pa .Array | pa .ChunkedArray :
104
+ def _box_pa_array (cls , value , copy : bool = False ) -> pa .Array | pa .ChunkedArray :
110
105
"""Box value into a pyarrow Array or ChunkedArray."""
111
106
if isinstance (value , cls ):
112
107
pa_array = value ._pa_array
113
- elif isinstance (value , (pa .Array , pa .ChunkedArray )):
114
- pa_array = value
115
108
else :
116
- try :
117
- value = [JSONArray ._serialize_json (x ) for x in value ]
118
- pa_array = pa .array (value , type = pa_type , from_pandas = True )
119
- except (pa .ArrowInvalid , pa .ArrowTypeError ):
120
- # https://github.com/pandas-dev/pandas/pull/50430:
121
- # let pyarrow infer type, then cast
122
- pa_array = pa .array (value , from_pandas = True )
123
-
124
- if pa_type is not None and pa_array .type != pa_type :
125
- pa_array = pa_array .cast (pa_type )
126
-
109
+ value = [JSONArray ._serialize_json (x ) for x in value ]
110
+ pa_array = pa .array (value , type = pa .string (), from_pandas = True )
127
111
return pa_array
128
112
129
113
@classmethod
130
114
def _from_sequence (cls , scalars , * , dtype = None , copy = False ):
131
115
"""Construct a new ExtensionArray from a sequence of scalars."""
132
- result = [JSONArray ._serialize_json (scalar ) for scalar in scalars ]
133
- return cls (pa .array (result , type = pa .string (), from_pandas = True ))
116
+ pa_array = cls ._box_pa (scalars )
117
+ arr = cls (pa_array )
118
+ return arr
134
119
135
120
@classmethod
136
121
def _concat_same_type (cls , to_concat ) -> JSONArray :
@@ -139,11 +124,6 @@ def _concat_same_type(cls, to_concat) -> JSONArray:
139
124
arr = pa .chunked_array (chunks , type = pa .string ())
140
125
return cls (arr )
141
126
142
- @classmethod
143
- def _from_factorized (cls , values , original ):
144
- """Reconstruct an ExtensionArray after factorization."""
145
- return cls ._from_sequence (values , dtype = original .dtype )
146
-
147
127
@staticmethod
148
128
def _serialize_json (value ):
149
129
"""A static method that converts a JSON value into a string representation."""
@@ -202,19 +182,6 @@ def __getitem__(self, item):
202
182
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
203
183
r"(`None`) and integer or boolean arrays are valid indices"
204
184
)
205
- # We are not an array indexer, so maybe e.g. a slice or integer
206
- # indexer. We dispatch to pyarrow.
207
- if isinstance (item , slice ):
208
- # Arrow bug https://github.com/apache/arrow/issues/38768
209
- if item .start == item .stop :
210
- pass
211
- elif (
212
- item .stop is not None
213
- and item .stop < - len (self )
214
- and item .step is not None
215
- and item .step < 0
216
- ):
217
- item = slice (item .start , None , item .step )
218
185
219
186
value = self ._pa_array [item ]
220
187
if isinstance (value , pa .ChunkedArray ):
@@ -229,7 +196,8 @@ def __getitem__(self, item):
229
196
def __iter__ (self ):
230
197
"""Iterate over elements of the array."""
231
198
for value in self ._pa_array :
232
- val = JSONArray ._deserialize_json (value .as_py ())
199
+ val = value .as_py ()
200
+ # val = JSONArray._deserialize_json(value.as_py())
233
201
if val is None :
234
202
yield self ._dtype .na_value
235
203
else :
0 commit comments