Skip to content

Commit 7add792

Browse files
committed
fix test cover
1 parent 17f560e commit 7add792

File tree

2 files changed

+31
-44
lines changed

2 files changed

+31
-44
lines changed

db_dtypes/json.py

Lines changed: 9 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ def _box_pa(
8181
cls, value, pa_type: pa.DataType | None = None
8282
) -> pa.Array | pa.ChunkedArray | pa.Scalar:
8383
"""Box value into a pyarrow Array, ChunkedArray or Scalar."""
84-
if pa_type is not None and pa_type != pa.string():
85-
raise ValueError(f"Unsupported type '{pa_type}' for JSONArray")
84+
assert pa_type is None or pa_type == pa.string()
8685

8786
if isinstance(value, pa.Scalar) or not (
8887
common.is_list_like(value) and not common.is_dict_like(value)
@@ -93,8 +92,6 @@ def _box_pa(
9392
@classmethod
9493
def _box_pa_scalar(cls, value) -> pa.Scalar:
9594
"""Box value into a pyarrow Scalar."""
96-
if isinstance(value, pa.Scalar):
97-
pa_scalar = value
9895
if pd.isna(value):
9996
pa_scalar = pa.scalar(None, type=pa.string())
10097
else:
@@ -104,33 +101,21 @@ def _box_pa_scalar(cls, value) -> pa.Scalar:
104101
return pa_scalar
105102

106103
@classmethod
107-
def _box_pa_array(
108-
cls, value, pa_type: pa.DataType | None = None, copy: bool = False
109-
) -> pa.Array | pa.ChunkedArray:
104+
def _box_pa_array(cls, value, copy: bool = False) -> pa.Array | pa.ChunkedArray:
110105
"""Box value into a pyarrow Array or ChunkedArray."""
111106
if isinstance(value, cls):
112107
pa_array = value._pa_array
113-
elif isinstance(value, (pa.Array, pa.ChunkedArray)):
114-
pa_array = value
115108
else:
116-
try:
117-
value = [JSONArray._serialize_json(x) for x in value]
118-
pa_array = pa.array(value, type=pa_type, from_pandas=True)
119-
except (pa.ArrowInvalid, pa.ArrowTypeError):
120-
# https://github.com/pandas-dev/pandas/pull/50430:
121-
# let pyarrow infer type, then cast
122-
pa_array = pa.array(value, from_pandas=True)
123-
124-
if pa_type is not None and pa_array.type != pa_type:
125-
pa_array = pa_array.cast(pa_type)
126-
109+
value = [JSONArray._serialize_json(x) for x in value]
110+
pa_array = pa.array(value, type=pa.string(), from_pandas=True)
127111
return pa_array
128112

129113
@classmethod
130114
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
131115
"""Construct a new ExtensionArray from a sequence of scalars."""
132-
result = [JSONArray._serialize_json(scalar) for scalar in scalars]
133-
return cls(pa.array(result, type=pa.string(), from_pandas=True))
116+
pa_array = cls._box_pa(scalars)
117+
arr = cls(pa_array)
118+
return arr
134119

135120
@classmethod
136121
def _concat_same_type(cls, to_concat) -> JSONArray:
@@ -139,11 +124,6 @@ def _concat_same_type(cls, to_concat) -> JSONArray:
139124
arr = pa.chunked_array(chunks, type=pa.string())
140125
return cls(arr)
141126

142-
@classmethod
143-
def _from_factorized(cls, values, original):
144-
"""Reconstruct an ExtensionArray after factorization."""
145-
return cls._from_sequence(values, dtype=original.dtype)
146-
147127
@staticmethod
148128
def _serialize_json(value):
149129
"""A static method that converts a JSON value into a string representation."""
@@ -202,19 +182,6 @@ def __getitem__(self, item):
202182
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
203183
r"(`None`) and integer or boolean arrays are valid indices"
204184
)
205-
# We are not an array indexer, so maybe e.g. a slice or integer
206-
# indexer. We dispatch to pyarrow.
207-
if isinstance(item, slice):
208-
# Arrow bug https://github.com/apache/arrow/issues/38768
209-
if item.start == item.stop:
210-
pass
211-
elif (
212-
item.stop is not None
213-
and item.stop < -len(self)
214-
and item.step is not None
215-
and item.step < 0
216-
):
217-
item = slice(item.start, None, item.step)
218185

219186
value = self._pa_array[item]
220187
if isinstance(value, pa.ChunkedArray):
@@ -229,7 +196,8 @@ def __getitem__(self, item):
229196
def __iter__(self):
230197
"""Iterate over elements of the array."""
231198
for value in self._pa_array:
232-
val = JSONArray._deserialize_json(value.as_py())
199+
val = value.as_py()
200+
# val = JSONArray._deserialize_json(value.as_py())
233201
if val is None:
234202
yield self._dtype.na_value
235203
else:

tests/unit/test_json.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515

1616
import json
1717

18+
import numpy as np
1819
import pandas as pd
19-
import pandas.testing
2020
import pytest
2121

2222
import db_dtypes
@@ -45,7 +45,12 @@
4545
}
4646

4747

48-
def test_get_items():
48+
def test_construct_w_unspported_types():
49+
with pytest.raises(ValueError):
50+
db_dtypes.JSONArray(100)
51+
52+
53+
def test_getitems_return_json_objects():
4954
data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values())
5055
for id, key in enumerate(JSON_DATA.keys()):
5156
if key == "null":
@@ -54,7 +59,7 @@ def test_get_items():
5459
assert data[id] == JSON_DATA[key]
5560

5661

57-
def test_get_items_unbox_object():
62+
def test_getitems_w_unboxed_dict():
5863
data = db_dtypes.JSONArray._from_sequence([JSON_DATA["dict"]])
5964
assert len(data[0]) == 2
6065

@@ -67,6 +72,20 @@ def test_get_items_unbox_object():
6772
data[0]["unknown"]
6873

6974

75+
def test_getitems_w_invalid_numpy_array():
76+
data = db_dtypes.JSONArray._from_sequence(JSON_DATA.values())
77+
idx = np.array(["str"])
78+
with pytest.raises(IndexError):
79+
data[idx]
80+
81+
82+
def test_getitems_when_iter_with_null():
83+
data = db_dtypes.JSONArray._from_sequence([JSON_DATA["null"]])
84+
s = pd.Series(data)
85+
result = s[:1].item()
86+
assert pd.isna(result)
87+
88+
7089
def test_to_numpy():
7190
s = pd.Series(db_dtypes.JSONArray._from_sequence(JSON_DATA.values()))
7291
data = s.to_numpy()

0 commit comments

Comments
 (0)