fix(identity-vault): test our nextPage logic

bheesham · bheesham · commit 014b89752411 · 2025-09-23T16:33:50.000-04:00
Jira: IAM-1793
diff --git a/python-modules/cis_identity_vault/cis_identity_vault/models/user.py b/python-modules/cis_identity_vault/cis_identity_vault/models/user.py
@@ -318,76 +318,14 @@ def all(self):
             users.extend(response["Items"])
         return users
 
-    def _last_evaluated_to_friendly(self, last_evaluated_keys):
-        """
-        Received from Dynamo, and serialized into something our clients can
-        understand (or rather, use: this _should_ be an opaque token to
-        clients).
-
-        When we're paginating through Dynamo, each segment returns a
-        `LastEvaluatedKey`, which we need to specify as `ExclusiveStartKey` in
-        subsequent requests. These `ExclusiveStartKey` is segment-specific,
-        hence the care here to serialize these in the order returned.
-
-        * `None`, indicating that we've completely finished, there are no more
-          results any segment can return;
-        * `list[Optional[Any]]`, indicating that _some_ segments have results
-          left.
-
-        Our clients' pagination logic (at least as seen by various publishers),
-        will consider the query over once we return `None`.
-        """
-        if not last_evaluated_keys:
-            return None
-        next_page = []
-        for last_evaluated_key in last_evaluated_keys:
-            # A signal that the segment is done scanning.
-            if last_evaluated_key is None:
-                id = ""
-            else:
-                id = last_evaluated_key["id"]["S"]
-            next_page.append(id)
-        # If there are at all any segments left with work, then continue.
-        if any(next_page):
-            next_page_raw = ",".join(next_page)
-            return urllib.parse.quote(next_page_raw)
-        else:
-            return None
-
-    def _next_page_to_dynamodb(self, next_page_raw):
-        """
-        Received from _clients_, and deserialized into something our parallel
-        Dynamo code understands.
-
-        A complication here is that we can't reuse `None`, since that would
-        cause a segment to start from the beginning. So, we use a sentinel
-        value of `"done"` to signal to the parallel Dynamo code that we should
-        skip this segment.
-
-        When Dynamo returns `None`, that means _all_ segments are done. If it
-        returns a `list[Optional[Any]]`, that means that we can still make
-        progress on some segments.
-        """
-        if not next_page_raw:
-            return None
-        next_page = urllib.parse.unquote(next_page_raw)
-        exclusive_start_keys = []
-        for last_evaluated_key in next_page.split(","):
-            if last_evaluated_key == "":
-                id = "done"
-            else:
-                id = {"id": {"S": last_evaluated_key}}
-            exclusive_start_keys.append(id)
-        return exclusive_start_keys
-
     def all_filtered(self, connection_method=None, active=None, next_page=None):
         """
         @query_filter str login_method
         Returns a dict of all users filtered by query_filter
         """
 
         projection_expression = "id, primary_email, user_uuid, active"
-        next_page = self._next_page_to_dynamodb(next_page)
+        next_page = next_page_to_dynamodb(next_page)
 
         if connection_method:
             logger.debug("No active filter passed.  Assuming we need all users.")
@@ -407,7 +345,7 @@ def all_filtered(self, connection_method=None, active=None, next_page=None):
             projection_expression=projection_expression,
             exclusive_start_keys=next_page,
         )
-        return dict(users=response["users"], nextPage=self._last_evaluated_to_friendly(response.get("nextPage")))
+        return dict(users=response["users"], nextPage=last_evaluated_to_friendly(response.get("nextPage")))
 
     def find_or_create(self, user_profile):
         profilev2 = json.loads(user_profile["profile"])
@@ -684,3 +622,67 @@ def find_or_create(self, user_profile):
         else:
             result = self.create(user_profile).user_id
         return result
+
+
+def last_evaluated_to_friendly(last_evaluated_keys):
+    """
+    Received from Dynamo, and serialized into something our clients can
+    understand (or rather, use: this _should_ be an opaque token to
+    clients).
+
+    When we're paginating through Dynamo, each segment returns a
+    `LastEvaluatedKey`, which we need to specify as `ExclusiveStartKey` in
+    subsequent requests. These `ExclusiveStartKey` is segment-specific,
+    hence the care here to serialize these in the order returned.
+
+    * `None`, indicating that we've completely finished, there are no more
+      results any segment can return;
+    * `list[Optional[Any]]`, indicating that _some_ segments have results
+      left.
+
+    Our clients' pagination logic (at least as seen by various publishers),
+    will consider the query over once we return `None`.
+    """
+    if not last_evaluated_keys:
+        return None
+    next_page = []
+    for last_evaluated_key in last_evaluated_keys:
+        # A signal that the segment is done scanning.
+        if last_evaluated_key is None:
+            id = ""
+        else:
+            id = last_evaluated_key["id"]["S"]
+        next_page.append(id)
+    # If there are at all any segments left with work, then continue.
+    if any(next_page):
+        next_page_raw = ",".join(next_page)
+        return urllib.parse.quote(next_page_raw)
+    else:
+        return None
+
+
+def next_page_to_dynamodb(next_page):
+    """
+    Received from _clients_, and deserialized into something our parallel
+    Dynamo code understands.
+
+    A complication here is that we can't reuse `None`, since that would
+    cause a segment to start from the beginning. So, we use a sentinel
+    value of `""` to signal to the parallel Dynamo code that we should
+    skip this segment.
+
+    When Dynamo returns `None`, that means _all_ segments are done. If it
+    returns a `list[Optional[Any]]`, that means that we can still make
+    progress on some segments.
+    """
+    if not next_page:
+        return None
+    next_page_unquoted = urllib.parse.unquote(next_page)
+    exclusive_start_keys = []
+    for last_evaluated_key in next_page_unquoted.split(","):
+        if last_evaluated_key == "":
+            id = None
+        else:
+            id = {"id": {"S": last_evaluated_key}}
+        exclusive_start_keys.append(id)
+    return exclusive_start_keys
diff --git a/python-modules/cis_identity_vault/cis_identity_vault/parallel_dynamo.py b/python-modules/cis_identity_vault/cis_identity_vault/parallel_dynamo.py
@@ -60,16 +60,17 @@ def scan(
     users = dict()
     last_evaluated_keys = [None] * max_segments
     threads = []
+    start = False
 
-    # If this is the first request, then we'll receive a None from our
-    # caller.
+    # If this is the first request, then we'll receive a None from our caller.
     if exclusive_start_keys is None:
+        start = True
         exclusive_start_keys = [None] * max_segments
 
     # When we're continuing, we signal that a segment has no more work to
-    # complete if it's ESK is "done". If _all_ of the segments have that, then
+    # complete if it's ESK is `None`. If _all_ of the segments have that, then
     # we're at the end of our result set.
-    elif all(map(lambda esk: esk == "done", exclusive_start_keys)):
+    if not start and all(map(lambda esk: esk is None, exclusive_start_keys)):
         return dict(users=[], nextPage=None)
 
     for thread_id in range(0, max_segments):
@@ -80,9 +81,9 @@ def scan(
             logger.critical("Someone may be DOSing us or not doing pagination properly.")
             raise
 
-        # If we explicitly read a "done", then this is a signal that the
-        # segment has no more records.
-        if exclusive_start_key == "done":
+        # If we started already and read a `None`, then this is a signal that
+        # the segment has no more records.
+        if not start and exclusive_start_key is None:
             logger.debug(f"skipping thread {thread_id}")
             continue
 
diff --git a/python-modules/cis_identity_vault/tests/test_models_user_next_page.py b/python-modules/cis_identity_vault/tests/test_models_user_next_page.py
@@ -0,0 +1,10 @@
+from cis_identity_vault.models.user import last_evaluated_to_friendly, next_page_to_dynamodb
+
+
+def test_identity():
+    expected = [None, None, {"id": {"S": "deadbeef"}}, {"id": {"S": "feedbeef"}}, None, None]
+    assert expected == next_page_to_dynamodb(last_evaluated_to_friendly(expected))
+
+
+def test_identity_two():
+    assert None == next_page_to_dynamodb(last_evaluated_to_friendly(None))