Skip to content

Commit 8473874

Browse files
authored
Update prune_properties to support pruning attributes from all sequence members (#711)
1 parent 3178509 commit 8473874

File tree

4 files changed

+463
-4
lines changed

4 files changed

+463
-4
lines changed

src/rpdk/core/contract/resource_client.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,18 @@
2222
get_temporary_credentials,
2323
)
2424
from ..jsonutils.pointer import fragment_decode, fragment_list
25-
from ..jsonutils.utils import item_hash, traverse, traverse_raw_schema
25+
from ..jsonutils.utils import (
26+
UNPACK_SEQUENCE_IDENTIFIER,
27+
item_hash,
28+
traverse,
29+
traverse_path_for_sequence_members,
30+
traverse_raw_schema,
31+
)
2632

2733
LOG = logging.getLogger(__name__)
34+
LOOKUP_ERROR_MESSAGE_FORMAT = (
35+
"Caught LookupError when pruning properties for document %s and path %s"
36+
)
2837

2938

3039
def prune_properties(document, paths):
@@ -35,16 +44,40 @@ def prune_properties(document, paths):
3544
for convenience. (The return value may be ignored.)
3645
"""
3746
for path in paths:
47+
# if '*' is in path, we need to prune more than one property (prune property for all members of array)
48+
if UNPACK_SEQUENCE_IDENTIFIER in path:
49+
document = _prune_properties_for_all_sequence_members(document, path)
50+
continue
3851
try:
3952
_prop, resolved_path, parent = traverse(document, path)
4053
except LookupError:
41-
pass # not found means nothing to delete
54+
# not found means nothing to delete
55+
LOG.info(LOOKUP_ERROR_MESSAGE_FORMAT, document, path)
4256
else:
4357
key = resolved_path[-1]
4458
del parent[key]
4559
return document
4660

4761

62+
def _prune_properties_for_all_sequence_members(document: dict, path: list) -> dict:
63+
try:
64+
# this returns multiple paths
65+
_prop, resolved_paths = traverse_path_for_sequence_members(document, path)
66+
except LookupError:
67+
# not found means nothing to delete
68+
LOG.info(LOOKUP_ERROR_MESSAGE_FORMAT, document, path)
69+
else:
70+
# paths with indices are gathered in increasing order, but we need to prune in reverse order
71+
resolved_paths = resolved_paths[::-1]
72+
for resolved_path in resolved_paths:
73+
new_doc = document
74+
for key in resolved_path[: len(resolved_path) - 1]:
75+
new_doc = new_doc[key]
76+
key = resolved_path[-1]
77+
del new_doc[key]
78+
return document
79+
80+
4881
def prune_properties_if_not_exist_in_path(output_model, input_model, paths):
4982
"""Prune given properties from a model.
5083

src/rpdk/core/jsonutils/utils.py

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import json
33
import logging
44
from collections.abc import Mapping, Sequence
5-
from typing import Any
5+
from typing import Any, List, Tuple
66

77
from nested_lookup import nested_lookup
88
from ordered_set import OrderedSet
@@ -14,6 +14,7 @@
1414
NON_MERGABLE_KEYS = ("uniqueItems", "insertionOrder")
1515
TYPE = "type"
1616
REF = "$ref"
17+
UNPACK_SEQUENCE_IDENTIFIER = "*"
1718

1819

1920
class FlatteningError(Exception):
@@ -185,6 +186,133 @@ def traverse_raw_schema(schema: dict, path: tuple):
185186
return {}
186187

187188

189+
def traverse_path_for_sequence_members(
190+
document: dict, path_parts: Sequence, path: list = None
191+
) -> Tuple[List[object], List[tuple]]:
192+
"""Traverse the paths for all sequence members in the document according to the reference.
193+
194+
Since the document is presumed to be the reference's base, the base is
195+
discarded. There is no validation that the reference is valid.
196+
197+
Differing from traverse, this returns a list of documents and a list of resolved paths.
198+
199+
:parameter document: document to traverse (dict or list)
200+
:parameter path_parts: document paths to traverse
201+
:parameter path: traversed path so far
202+
203+
:raises ValueError, LookupError: the reference is invalid for this document
204+
205+
>>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, tuple())
206+
([{'foo': {'bar': [42, 43, 44]}}], [()])
207+
>>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, ["foo"])
208+
([{'bar': [42, 43, 44]}], [('foo',)])
209+
>>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, ("foo", "bar"))
210+
([[42, 43, 44]], [('foo', 'bar')])
211+
>>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, ("foo", "bar", "*"))
212+
([42, 43, 44], [('foo', 'bar', 0), ('foo', 'bar', 1), ('foo', 'bar', 2)])
213+
>>> traverse_path_for_sequence_members({"foo": {"bar": [{"baz": 1, "bin": 1}, {"baz": 2, "bin": 2}]}}, ("foo", "bar", "*"))
214+
([{'baz': 1, 'bin': 1}, {'baz': 2, 'bin': 2}], [('foo', 'bar', 0), ('foo', 'bar', 1)])
215+
>>> traverse_path_for_sequence_members({"foo": {"bar": [{"baz": 1, "bin": 1}, {"baz": 2, "bin": 2}]}}, ("foo", "bar", "*", "baz"))
216+
([1, 2], [('foo', 'bar', 0, 'baz'), ('foo', 'bar', 1, 'baz')])
217+
>>> traverse_path_for_sequence_members({}, ["foo"])
218+
Traceback (most recent call last):
219+
...
220+
KeyError: 'foo'
221+
>>> traverse_path_for_sequence_members([], ["foo"])
222+
Traceback (most recent call last):
223+
...
224+
ValueError: invalid literal for int() with base 10: 'foo'
225+
>>> traverse_path_for_sequence_members([], [0])
226+
Traceback (most recent call last):
227+
...
228+
IndexError: list index out of range
229+
"""
230+
if path is None:
231+
path = []
232+
if not path_parts:
233+
return [document], [tuple(path)]
234+
path_parts = list(path_parts)
235+
if not isinstance(document, Sequence):
236+
return _handle_non_sequence_for_traverse(document, path_parts, path)
237+
return _handle_sequence_for_traverse(document, path_parts, path)
238+
239+
240+
def _handle_non_sequence_for_traverse(
241+
current_document: dict, current_path_parts: list, current_path: list
242+
) -> Tuple[List[object], List[tuple]]:
243+
"""
244+
Handling a non-sequence member for `traverse_path_for_sequence_members` is like the loop block in `traverse`:
245+
246+
The next path part is the first part in the list of path parts.
247+
The new document is obtained from the current document using the new path part as the key.
248+
The next path part is added to the traversed path.
249+
250+
The traversal continues by recursively calling `traverse_path_for_sequence_members`
251+
"""
252+
part_to_handle = current_path_parts.pop(0)
253+
current_document = current_document[part_to_handle]
254+
current_path.append(part_to_handle)
255+
return traverse_path_for_sequence_members(
256+
current_document, current_path_parts, current_path
257+
)
258+
259+
260+
def _handle_sequence_for_traverse(
261+
current_document: Sequence, current_path_parts: list, current_path: list
262+
) -> Tuple[List[object], List[tuple]]:
263+
"""
264+
Check the new path part for the unpack sequence identifier (e.g. '*'), otherwise traverse index and continue:
265+
266+
The new document is obtained from the current document (a sequence) using the new path part as the index.
267+
The next path part is added to the traversed path
268+
"""
269+
sequence_part = current_path_parts.pop(0)
270+
if sequence_part == UNPACK_SEQUENCE_IDENTIFIER:
271+
return _handle_unpack_sequence_for_traverse(
272+
current_document, current_path_parts, current_path
273+
)
274+
# otherwise, sequence part should be a valid index
275+
current_sequence_part = int(sequence_part)
276+
current_document = current_document[current_sequence_part]
277+
current_path.append(current_sequence_part)
278+
return [current_document], [tuple(current_path)]
279+
280+
281+
def _handle_unpack_sequence_for_traverse(
282+
current_document: Sequence, current_path_parts: list, current_path: list
283+
) -> Tuple[List[object], List[tuple]]:
284+
"""
285+
When unpacking a sequence, we need to include multiple paths and multiple documents, one for each sequence member.
286+
287+
For each sequence member:
288+
Append the traversed paths w/ the sequence index, and get the new document.
289+
The new document is obtained by traversing the current document using the sequence index.
290+
The new document is appended to the list of new documents.
291+
292+
For each new document:
293+
The remaining document is traversed using the remaining path parts.
294+
The list of traversed documents and traversed paths are returned.
295+
"""
296+
documents = []
297+
resolved_paths = []
298+
new_documents = []
299+
new_paths = []
300+
for sequence_index in range(len(current_document)):
301+
new_paths.append(current_path.copy() + [sequence_index])
302+
new_document = traverse_path_for_sequence_members(
303+
current_document, [sequence_index] + current_path_parts, current_path.copy()
304+
)[0]
305+
new_documents.extend(new_document)
306+
for i in range(len(new_documents)): # pylint: disable=consider-using-enumerate
307+
new_document = new_documents[i]
308+
newer_documents, newer_paths = traverse_path_for_sequence_members(
309+
new_document, current_path_parts, new_paths[i]
310+
)
311+
documents.extend(newer_documents)
312+
resolved_paths.extend(newer_paths)
313+
return documents, resolved_paths
314+
315+
188316
def schema_merge(target, src, path): # noqa: C901 # pylint: disable=R0912
189317
"""Merges the src schema into the target schema in place.
190318

tests/contract/test_resource_client.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,174 @@ def test_prune_properties():
345345
assert document == {"one": "two", "array": ["first"]}
346346

347347

348+
def test_prune_properties_for_all_sequence_members():
349+
document: dict = {
350+
"foo": "bar",
351+
"spam": "eggs",
352+
"one": "two",
353+
"array": ["first", "second"],
354+
}
355+
prune_properties(
356+
document,
357+
[
358+
("foo",), # prune foo: bar
359+
("spam",), # prune spam: eggs
360+
("not_found",), # missing members are fine
361+
(
362+
"not_found", # missing sequences are fine
363+
"*",
364+
),
365+
(
366+
"array", # prune members of sequence "array"
367+
"*",
368+
),
369+
],
370+
)
371+
assert document == {"one": "two", "array": []}
372+
373+
374+
def test_prune_properties_nested_sequence():
375+
document: dict = {
376+
"array": [
377+
{
378+
"outer1": {"inner1": "valueA", "inner2": "valueA"},
379+
"outer2": ["valueA", "valueB"],
380+
},
381+
{
382+
"outer1": {"inner1": "valueB", "inner2": "valueB"},
383+
"outer2": ["valueC", "valueD"],
384+
},
385+
],
386+
}
387+
prune_properties(
388+
document,
389+
[
390+
(
391+
"not_found",
392+
"*",
393+
"not_found",
394+
"*",
395+
),
396+
(
397+
"array",
398+
"*",
399+
"outer1",
400+
"inner1",
401+
),
402+
(
403+
"array",
404+
"*",
405+
"outer2",
406+
"*",
407+
),
408+
],
409+
)
410+
assert document == {
411+
"array": [
412+
{"outer1": {"inner2": "valueA"}, "outer2": []},
413+
{"outer1": {"inner2": "valueB"}, "outer2": []},
414+
]
415+
}
416+
417+
418+
def test_prune_properties_nested_sequence_2():
419+
document: dict = {
420+
"array": [
421+
{
422+
"array2": [{"i1": "A", "i2": "B"}, {"i1": "C", "i2": "D"}],
423+
"outer1": {"inner1": "valueA", "inner2": "valueA"},
424+
"outer2": ["valueA", "valueB"],
425+
},
426+
{
427+
"array2": [{"i1": "E", "i2": "F"}, {"i1": "G", "i2": "H"}],
428+
"outer1": {"inner1": "valueB", "inner2": "valueB"},
429+
"outer2": ["valueC", "valueD"],
430+
},
431+
],
432+
}
433+
prune_properties(
434+
document,
435+
[
436+
(
437+
"not_found",
438+
"*",
439+
"not_found",
440+
"*",
441+
),
442+
(
443+
"array",
444+
"*",
445+
"outer1",
446+
"inner1",
447+
),
448+
(
449+
"array",
450+
"*",
451+
"outer2",
452+
"*",
453+
),
454+
(
455+
"array",
456+
"1",
457+
"1",
458+
"i1",
459+
),
460+
],
461+
)
462+
assert document == {
463+
"array": [
464+
{
465+
"array2": [{"i1": "A", "i2": "B"}, {"i1": "C", "i2": "D"}],
466+
"outer1": {"inner2": "valueA"},
467+
"outer2": [],
468+
},
469+
{
470+
"array2": [{"i1": "E", "i2": "F"}, {"i1": "G", "i2": "H"}],
471+
"outer1": {"inner2": "valueB"},
472+
"outer2": [],
473+
},
474+
]
475+
}
476+
477+
478+
def test_prune_properties_specific_sequence_indices():
479+
document: dict = {
480+
"array": [
481+
{
482+
"outer1": {"inner1": "valueA", "inner2": "valueA"},
483+
"outer2": ["valueA", "valueB"],
484+
},
485+
{
486+
"outer1": {"inner1": "valueB", "inner2": "valueB"},
487+
"outer2": ["valueC", "valueD"],
488+
},
489+
],
490+
}
491+
prune_properties(
492+
document,
493+
[
494+
(
495+
"array",
496+
"0",
497+
"outer1",
498+
"inner1",
499+
),
500+
(
501+
"array",
502+
"1",
503+
"outer2",
504+
"1",
505+
),
506+
],
507+
)
508+
assert document == {
509+
"array": [
510+
{"outer1": {"inner2": "valueA"}, "outer2": ["valueA", "valueB"]},
511+
{"outer1": {"inner1": "valueB", "inner2": "valueB"}, "outer2": ["valueC"]},
512+
]
513+
}
514+
515+
348516
def test_prune_properties_from_model():
349517
document = {
350518
"foo": "bar",

0 commit comments

Comments
 (0)