Skip to content

Commit 18789c4

Browse files
committed
Fix re-index of sparse array selections
1 parent 2667fde commit 18789c4

File tree

4 files changed

+176
-24
lines changed

4 files changed

+176
-24
lines changed

docs/query.md

Lines changed: 128 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ for value in it.values():
8282

8383
## Tee
8484

85-
And finally there's `tee()`, which creates multiple independent queries from one query iterator. It is not safe to use the initial `Query` instance after calling `tee()`.
85+
[`tee()`](api.md#jsonpath.Query.tee) creates multiple independent queries from one query iterator. It is not safe to use the initial `Query` instance after calling `tee()`.
8686

8787
```python
8888
from jsonpath import query
@@ -92,3 +92,130 @@ it1, it2 = query("$.some[[email protected]]", data).tee()
9292
head = it1.head(10) # first 10 matches
9393
tail = it2.tail(10) # last 10 matches
9494
```
95+
96+
## Select
97+
98+
[`select(*expressions, projection=Projection.RELATIVE)`](api.md/#jsonpath.Query.select) performs JSONPath match projection, selecting a subset of values according to one or more JSONPath query expressions relative to the match location. For example:
99+
100+
```python
101+
from jsonpath import query
102+
103+
data = {
104+
"categories": [
105+
{
106+
"name": "footwear",
107+
"products": [
108+
{
109+
"title": "Trainers",
110+
"description": "Fashionable trainers.",
111+
"price": 89.99,
112+
},
113+
{
114+
"title": "Barefoot Trainers",
115+
"description": "Running trainers.",
116+
"price": 130.00,
117+
"social": {"likes": 12, "shares": 7},
118+
},
119+
],
120+
},
121+
{
122+
"name": "headwear",
123+
"products": [
124+
{
125+
"title": "Cap",
126+
"description": "Baseball cap",
127+
"price": 15.00,
128+
},
129+
{
130+
"title": "Beanie",
131+
"description": "Winter running hat.",
132+
"price": 9.00,
133+
},
134+
],
135+
},
136+
],
137+
"price_cap": 10,
138+
}
139+
140+
for product in query("$..products.*", data).select("title", "price"):
141+
print(product)
142+
```
143+
144+
Which selects just the `title` and `price` fields for each product.
145+
146+
```text
147+
{'title': 'Trainers', 'price': 89.99}
148+
{'title': 'Barefoot Trainers', 'price': 130.0}
149+
{'title': 'Cap', 'price': 15.0}
150+
{'title': 'Beanie', 'price': 9.0}
151+
```
152+
153+
Without the call to `select()`, we'd get all fields in each product object.
154+
155+
```python
156+
# ...
157+
158+
for product in query("$..products.*", data).values():
159+
print(product)
160+
```
161+
162+
```text
163+
{'title': 'Trainers', 'description': 'Fashionable trainers.', 'price': 89.99}
164+
{'title': 'Barefoot Trainers', 'description': 'Running trainers.', 'price': 130.0, 'social': {'likes': 12, 'shares': 7}}
165+
{'title': 'Cap', 'description': 'Baseball cap', 'price': 15.0}
166+
{'title': 'Beanie', 'description': 'Winter running hat.', 'price': 9.0}
167+
```
168+
169+
We can select nested values too.
170+
171+
```python
172+
# ...
173+
174+
for product in query("$..products.*", data).select("title", "social.shares"):
175+
print(product)
176+
```
177+
178+
```text
179+
{'title': 'Trainers'}
180+
{'title': 'Barefoot Trainers', 'social': {'shares': 7}}
181+
{'title': 'Cap'}
182+
{'title': 'Beanie'}
183+
```
184+
185+
And flatten the selection into a sequence of values.
186+
187+
```python
188+
from jsonpath import Projection
189+
190+
# ...
191+
192+
for product in query("$..products.*", data).select(
193+
"title", "social.shares", projection=Projection.FLAT
194+
):
195+
print(product)
196+
```
197+
198+
```text
199+
['Trainers']
200+
['Barefoot Trainers', 7]
201+
['Cap']
202+
['Beanie']
203+
```
204+
205+
Or project the selection from the JSON value root.
206+
207+
```python
208+
# ..
209+
210+
for product in query("$..products[[email protected]]", data).select(
211+
"title",
212+
"social.shares",
213+
projection=Projection.ROOT,
214+
):
215+
print(product)
216+
217+
```
218+
219+
```text
220+
{'categories': [{'products': [{'title': 'Barefoot Trainers', 'social': {'shares': 7}}]}]}
221+
```

jsonpath/fluent_api.py

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ def _select(
185185
expressions: Tuple[str, ...],
186186
projection: Projection,
187187
) -> object:
188+
if isinstance(match.obj, str):
189+
return None
188190
if isinstance(match.obj, Sequence) or projection == Projection.FLAT:
189191
obj: Union[List[Any], Dict[str, Any]] = []
190192
elif isinstance(match.obj, Mapping):
@@ -197,7 +199,7 @@ def _select(
197199
for expr in expressions:
198200
self._patch(match, expr, patch, projection)
199201

200-
return patch.apply(obj)
202+
return _sparse_values(patch.apply(obj))
201203

202204
def _patch(
203205
self,
@@ -217,13 +219,11 @@ def _patch(
217219
str(p).replace("~", "~0").replace("/", "~1")
218220
for p in rel_match.parts
219221
)
220-
pointer = root_pointer / rel_pointer
221-
_patch_parents(pointer.parent(), patch, match.obj) # type: ignore
222+
pointer = _patch_parents(root_pointer / rel_pointer, patch, match.root) # type: ignore
222223
patch.addap(pointer, rel_match.obj)
223224
else:
224225
# Natural projection
225-
pointer = rel_match.pointer()
226-
_patch_parents(pointer.parent(), patch, match.obj) # type: ignore
226+
pointer = _patch_parents(rel_match.pointer(), patch, match.obj) # type: ignore
227227
patch.addap(pointer, rel_match.obj)
228228

229229
def first_one(self) -> Optional[JSONPathMatch]:
@@ -266,17 +266,40 @@ def _patch_parents(
266266
pointer: JSONPointer,
267267
patch: JSONPatch,
268268
obj: Union[Sequence[Any], Mapping[str, Any]],
269-
) -> None:
270-
if pointer.parent().parts:
271-
_patch_parents(pointer.parent(), patch, obj)
272-
273-
try:
274-
_obj = pointer.resolve(obj)
275-
except JSONPointerKeyError:
276-
_obj = obj
277-
278-
if pointer.parts:
279-
if isinstance(_obj, Sequence):
280-
patch.addne(pointer, [])
281-
elif isinstance(_obj, Mapping):
282-
patch.addne(pointer, {})
269+
) -> JSONPointer:
270+
parent = pointer.parent()
271+
if parent.parent().parts:
272+
_patch_parents(parent, patch, obj)
273+
274+
if parent.parts:
275+
try:
276+
_obj = parent.resolve(obj)
277+
except JSONPointerKeyError:
278+
_obj = obj
279+
280+
# For lack of a better solution, we're patching arrays to dictionaries with
281+
# integer keys. This is to handle sparse array selections without having to
282+
# keep track of indexes and how they map from the root JSON value to the
283+
# selected JSON value.
284+
#
285+
# We'll fix these "sparse arrays" after the patch has been applied.
286+
if isinstance(_obj, (Sequence, Mapping)) and not isinstance(_obj, str):
287+
patch.addne(parent, {})
288+
289+
return pointer
290+
291+
292+
def _sparse_values(obj: Any) -> object:
293+
"""Fix sparse arrays (dictionaries with integer keys)."""
294+
if isinstance(obj, str) or not obj:
295+
return obj
296+
297+
if isinstance(obj, Sequence):
298+
return [_sparse_values(e) for e in obj]
299+
300+
if isinstance(obj, Mapping):
301+
if isinstance(next(iter(obj)), int):
302+
return [_sparse_values(v) for v in obj.values()]
303+
return {k: _sparse_values(v) for k, v in obj.items()}
304+
305+
return obj

jsonpath/patch.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,7 @@ def apply(
112112
target = self.path.parts[-1]
113113
if isinstance(parent, MutableSequence):
114114
if obj is UNDEFINED:
115-
if target == "-":
116-
parent.append(self.value)
117-
else:
118-
raise JSONPatchError("index out of range")
115+
parent.append(self.value)
119116
else:
120117
parent.insert(int(target), self.value)
121118
elif isinstance(parent, MutableMapping) and target not in parent:
@@ -669,6 +666,7 @@ def apply(
669666
raise JSONPatchError(f"{err} ({op.name}:{i})") from err
670667
except (JSONPointerError, JSONPatchError) as err:
671668
raise JSONPatchError(f"{err} ({op.name}:{i})") from err
669+
672670
return _data
673671

674672
def asdicts(self) -> List[Dict[str, object]]:

jsonpath/pointer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""JSON Pointer. See https://datatracker.ietf.org/doc/html/rfc6901."""
2+
23
from __future__ import annotations
34

45
import codecs
@@ -326,6 +327,9 @@ def is_relative_to(self, other: JSONPointer) -> bool:
326327
def __eq__(self, other: object) -> bool:
327328
return isinstance(other, JSONPointer) and self.parts == other.parts
328329

330+
def __hash__(self) -> int:
331+
return hash(self.parts)
332+
329333
def __repr__(self) -> str:
330334
return f"JSONPointer({self._s!r})"
331335

0 commit comments

Comments
 (0)