Update prune_properties to support pruning attributes from all sequence members (#711)

privetjr · web-flow · commit 8473874f3cdc · 2021-09-10T15:53:56.000-07:00
diff --git a/src/rpdk/core/contract/resource_client.py b/src/rpdk/core/contract/resource_client.py
@@ -22,9 +22,18 @@
     get_temporary_credentials,
 )
 from ..jsonutils.pointer import fragment_decode, fragment_list
-from ..jsonutils.utils import item_hash, traverse, traverse_raw_schema
+from ..jsonutils.utils import (
+    UNPACK_SEQUENCE_IDENTIFIER,
+    item_hash,
+    traverse,
+    traverse_path_for_sequence_members,
+    traverse_raw_schema,
+)
 
 LOG = logging.getLogger(__name__)
+LOOKUP_ERROR_MESSAGE_FORMAT = (
+    "Caught LookupError when pruning properties for document %s and path %s"
+)
 
 
 def prune_properties(document, paths):
@@ -35,16 +44,40 @@ def prune_properties(document, paths):
     for convenience. (The return value may be ignored.)
     """
     for path in paths:
+        # if '*' is in path, we need to prune more than one property (prune property for all members of array)
+        if UNPACK_SEQUENCE_IDENTIFIER in path:
+            document = _prune_properties_for_all_sequence_members(document, path)
+            continue
         try:
             _prop, resolved_path, parent = traverse(document, path)
         except LookupError:
-            pass  # not found means nothing to delete
+            # not found means nothing to delete
+            LOG.info(LOOKUP_ERROR_MESSAGE_FORMAT, document, path)
         else:
             key = resolved_path[-1]
             del parent[key]
     return document
 
 
+def _prune_properties_for_all_sequence_members(document: dict, path: list) -> dict:
+    try:
+        # this returns multiple paths
+        _prop, resolved_paths = traverse_path_for_sequence_members(document, path)
+    except LookupError:
+        # not found means nothing to delete
+        LOG.info(LOOKUP_ERROR_MESSAGE_FORMAT, document, path)
+    else:
+        # paths with indices are gathered in increasing order, but we need to prune in reverse order
+        resolved_paths = resolved_paths[::-1]
+        for resolved_path in resolved_paths:
+            new_doc = document
+            for key in resolved_path[: len(resolved_path) - 1]:
+                new_doc = new_doc[key]
+            key = resolved_path[-1]
+            del new_doc[key]
+    return document
+
+
 def prune_properties_if_not_exist_in_path(output_model, input_model, paths):
     """Prune given properties from a model.
 
diff --git a/src/rpdk/core/jsonutils/utils.py b/src/rpdk/core/jsonutils/utils.py
@@ -2,7 +2,7 @@
 import json
 import logging
 from collections.abc import Mapping, Sequence
-from typing import Any
+from typing import Any, List, Tuple
 
 from nested_lookup import nested_lookup
 from ordered_set import OrderedSet
@@ -14,6 +14,7 @@
 NON_MERGABLE_KEYS = ("uniqueItems", "insertionOrder")
 TYPE = "type"
 REF = "$ref"
+UNPACK_SEQUENCE_IDENTIFIER = "*"
 
 
 class FlatteningError(Exception):
@@ -185,6 +186,133 @@ def traverse_raw_schema(schema: dict, path: tuple):
         return {}
 
 
+def traverse_path_for_sequence_members(
+    document: dict, path_parts: Sequence, path: list = None
+) -> Tuple[List[object], List[tuple]]:
+    """Traverse the paths for all sequence members in the document according to the reference.
+
+    Since the document is presumed to be the reference's base, the base is
+    discarded. There is no validation that the reference is valid.
+
+    Differing from traverse, this returns a list of documents and a list of resolved paths.
+
+    :parameter document: document to traverse (dict or list)
+    :parameter path_parts: document paths to traverse
+    :parameter path: traversed path so far
+
+    :raises ValueError, LookupError: the reference is invalid for this document
+
+    >>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, tuple())
+    ([{'foo': {'bar': [42, 43, 44]}}], [()])
+    >>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, ["foo"])
+    ([{'bar': [42, 43, 44]}], [('foo',)])
+    >>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, ("foo", "bar"))
+    ([[42, 43, 44]], [('foo', 'bar')])
+    >>> traverse_path_for_sequence_members({"foo": {"bar": [42, 43, 44]}}, ("foo", "bar", "*"))
+    ([42, 43, 44], [('foo', 'bar', 0), ('foo', 'bar', 1), ('foo', 'bar', 2)])
+    >>> traverse_path_for_sequence_members({"foo": {"bar": [{"baz": 1, "bin": 1}, {"baz": 2, "bin": 2}]}}, ("foo", "bar", "*"))
+    ([{'baz': 1, 'bin': 1}, {'baz': 2, 'bin': 2}], [('foo', 'bar', 0), ('foo', 'bar', 1)])
+    >>> traverse_path_for_sequence_members({"foo": {"bar": [{"baz": 1, "bin": 1}, {"baz": 2, "bin": 2}]}}, ("foo", "bar", "*", "baz"))
+    ([1, 2], [('foo', 'bar', 0, 'baz'), ('foo', 'bar', 1, 'baz')])
+    >>> traverse_path_for_sequence_members({}, ["foo"])
+    Traceback (most recent call last):
+    ...
+    KeyError: 'foo'
+    >>> traverse_path_for_sequence_members([], ["foo"])
+    Traceback (most recent call last):
+    ...
+    ValueError: invalid literal for int() with base 10: 'foo'
+    >>> traverse_path_for_sequence_members([], [0])
+    Traceback (most recent call last):
+    ...
+    IndexError: list index out of range
+    """
+    if path is None:
+        path = []
+    if not path_parts:
+        return [document], [tuple(path)]
+    path_parts = list(path_parts)
+    if not isinstance(document, Sequence):
+        return _handle_non_sequence_for_traverse(document, path_parts, path)
+    return _handle_sequence_for_traverse(document, path_parts, path)
+
+
+def _handle_non_sequence_for_traverse(
+    current_document: dict, current_path_parts: list, current_path: list
+) -> Tuple[List[object], List[tuple]]:
+    """
+    Handling a non-sequence member for `traverse_path_for_sequence_members` is like the loop block in `traverse`:
+
+    The next path part is the first part in the list of path parts.
+    The new document is obtained from the current document using the new path part as the key.
+    The next path part is added to the traversed path.
+
+    The traversal continues by recursively calling `traverse_path_for_sequence_members`
+    """
+    part_to_handle = current_path_parts.pop(0)
+    current_document = current_document[part_to_handle]
+    current_path.append(part_to_handle)
+    return traverse_path_for_sequence_members(
+        current_document, current_path_parts, current_path
+    )
+
+
+def _handle_sequence_for_traverse(
+    current_document: Sequence, current_path_parts: list, current_path: list
+) -> Tuple[List[object], List[tuple]]:
+    """
+    Check the new path part for the unpack sequence identifier (e.g. '*'), otherwise traverse index and continue:
+
+    The new document is obtained from the current document (a sequence) using the new path part as the index.
+    The next path part is added to the traversed path
+    """
+    sequence_part = current_path_parts.pop(0)
+    if sequence_part == UNPACK_SEQUENCE_IDENTIFIER:
+        return _handle_unpack_sequence_for_traverse(
+            current_document, current_path_parts, current_path
+        )
+    # otherwise, sequence part should be a valid index
+    current_sequence_part = int(sequence_part)
+    current_document = current_document[current_sequence_part]
+    current_path.append(current_sequence_part)
+    return [current_document], [tuple(current_path)]
+
+
+def _handle_unpack_sequence_for_traverse(
+    current_document: Sequence, current_path_parts: list, current_path: list
+) -> Tuple[List[object], List[tuple]]:
+    """
+    When unpacking a sequence, we need to include multiple paths and multiple documents, one for each sequence member.
+
+    For each sequence member:
+    Append the traversed paths w/ the sequence index, and get the new document.
+    The new document is obtained by traversing the current document using the sequence index.
+    The new document is appended to the list of new documents.
+
+    For each new document:
+    The remaining document is traversed using the remaining path parts.
+    The list of traversed documents and traversed paths are returned.
+    """
+    documents = []
+    resolved_paths = []
+    new_documents = []
+    new_paths = []
+    for sequence_index in range(len(current_document)):
+        new_paths.append(current_path.copy() + [sequence_index])
+        new_document = traverse_path_for_sequence_members(
+            current_document, [sequence_index] + current_path_parts, current_path.copy()
+        )[0]
+        new_documents.extend(new_document)
+    for i in range(len(new_documents)):  # pylint: disable=consider-using-enumerate
+        new_document = new_documents[i]
+        newer_documents, newer_paths = traverse_path_for_sequence_members(
+            new_document, current_path_parts, new_paths[i]
+        )
+        documents.extend(newer_documents)
+        resolved_paths.extend(newer_paths)
+    return documents, resolved_paths
+
+
 def schema_merge(target, src, path):  # noqa: C901 # pylint: disable=R0912
     """Merges the src schema into the target schema in place.
 
diff --git a/tests/contract/test_resource_client.py b/tests/contract/test_resource_client.py
@@ -345,6 +345,174 @@ def test_prune_properties():
     assert document == {"one": "two", "array": ["first"]}
 
 
+def test_prune_properties_for_all_sequence_members():
+    document: dict = {
+        "foo": "bar",
+        "spam": "eggs",
+        "one": "two",
+        "array": ["first", "second"],
+    }
+    prune_properties(
+        document,
+        [
+            ("foo",),  # prune foo: bar
+            ("spam",),  # prune spam: eggs
+            ("not_found",),  # missing members are fine
+            (
+                "not_found",  # missing sequences are fine
+                "*",
+            ),
+            (
+                "array",  # prune members of sequence "array"
+                "*",
+            ),
+        ],
+    )
+    assert document == {"one": "two", "array": []}
+
+
+def test_prune_properties_nested_sequence():
+    document: dict = {
+        "array": [
+            {
+                "outer1": {"inner1": "valueA", "inner2": "valueA"},
+                "outer2": ["valueA", "valueB"],
+            },
+            {
+                "outer1": {"inner1": "valueB", "inner2": "valueB"},
+                "outer2": ["valueC", "valueD"],
+            },
+        ],
+    }
+    prune_properties(
+        document,
+        [
+            (
+                "not_found",
+                "*",
+                "not_found",
+                "*",
+            ),
+            (
+                "array",
+                "*",
+                "outer1",
+                "inner1",
+            ),
+            (
+                "array",
+                "*",
+                "outer2",
+                "*",
+            ),
+        ],
+    )
+    assert document == {
+        "array": [
+            {"outer1": {"inner2": "valueA"}, "outer2": []},
+            {"outer1": {"inner2": "valueB"}, "outer2": []},
+        ]
+    }
+
+
+def test_prune_properties_nested_sequence_2():
+    document: dict = {
+        "array": [
+            {
+                "array2": [{"i1": "A", "i2": "B"}, {"i1": "C", "i2": "D"}],
+                "outer1": {"inner1": "valueA", "inner2": "valueA"},
+                "outer2": ["valueA", "valueB"],
+            },
+            {
+                "array2": [{"i1": "E", "i2": "F"}, {"i1": "G", "i2": "H"}],
+                "outer1": {"inner1": "valueB", "inner2": "valueB"},
+                "outer2": ["valueC", "valueD"],
+            },
+        ],
+    }
+    prune_properties(
+        document,
+        [
+            (
+                "not_found",
+                "*",
+                "not_found",
+                "*",
+            ),
+            (
+                "array",
+                "*",
+                "outer1",
+                "inner1",
+            ),
+            (
+                "array",
+                "*",
+                "outer2",
+                "*",
+            ),
+            (
+                "array",
+                "1",
+                "1",
+                "i1",
+            ),
+        ],
+    )
+    assert document == {
+        "array": [
+            {
+                "array2": [{"i1": "A", "i2": "B"}, {"i1": "C", "i2": "D"}],
+                "outer1": {"inner2": "valueA"},
+                "outer2": [],
+            },
+            {
+                "array2": [{"i1": "E", "i2": "F"}, {"i1": "G", "i2": "H"}],
+                "outer1": {"inner2": "valueB"},
+                "outer2": [],
+            },
+        ]
+    }
+
+
+def test_prune_properties_specific_sequence_indices():
+    document: dict = {
+        "array": [
+            {
+                "outer1": {"inner1": "valueA", "inner2": "valueA"},
+                "outer2": ["valueA", "valueB"],
+            },
+            {
+                "outer1": {"inner1": "valueB", "inner2": "valueB"},
+                "outer2": ["valueC", "valueD"],
+            },
+        ],
+    }
+    prune_properties(
+        document,
+        [
+            (
+                "array",
+                "0",
+                "outer1",
+                "inner1",
+            ),
+            (
+                "array",
+                "1",
+                "outer2",
+                "1",
+            ),
+        ],
+    )
+    assert document == {
+        "array": [
+            {"outer1": {"inner2": "valueA"}, "outer2": ["valueA", "valueB"]},
+            {"outer1": {"inner1": "valueB", "inner2": "valueB"}, "outer2": ["valueC"]},
+        ]
+    }
+
+
 def test_prune_properties_from_model():
     document = {
         "foo": "bar",
diff --git a/tests/jsonutils/test_utils.py b/tests/jsonutils/test_utils.py