Merge pull request #95 from ParclLabs/fix/limit-pagination

zhibindai26 · web-flow · commit f262bd3f44d6 · 2025-08-14T14:58:52.000-05:00
use auto_paginate var to manage pagination
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,6 @@
+### v1.16.1
+- Fix bug in auto pagination logic
+
 ### v1.16.0
 - Added the `entity_seller_name` parameter to the `property_v2.search` endpoint
 
diff --git a/parcllabs/__version__.py b/parcllabs/__version__.py
@@ -1 +1 @@
-VERSION = "1.16.0"
+VERSION = "1.16.1"
diff --git a/parcllabs/services/properties/property_v2.py b/parcllabs/services/properties/property_v2.py
@@ -32,48 +32,46 @@ def _fetch_post(self, params: dict[str, Any], data: dict[str, Any]) -> list[dict
         result = response.json()
 
         all_data = [result]
+
+        if params["auto_paginate"] is False:
+            return all_data
+
+        # If we need to paginate, use concurrent requests
         pagination = result.get("pagination")
-        metadata = result.get("metadata")
+        if pagination.get("has_more"):
+            print("More pages to fetch, paginating additional pages...")
 
-        if pagination:
             limit = pagination.get("limit")
-            returned_count = metadata.get("results", {}).get("returned_count", 0)
-            # if we got fewer or equal results than requested, don't paginate
-            if returned_count <= limit:
-                return all_data
-
-            # If we need to paginate, use concurrent requests
-            if pagination.get("has_more"):
-                print("More pages to fetch, paginating additional pages...")
-                offset = pagination.get("offset")
-                total_available = metadata.get("results", {}).get("total_available", 0)
-
-                # Calculate how many more pages we need
-                remaining_pages = (total_available - limit) // limit
-                if (total_available - limit) % limit > 0:
-                    remaining_pages += 1
-
-                # Generate all the URLs we need to fetch
-                urls = []
-                current_offset = offset + limit
-                for _ in range(remaining_pages):
-                    urls.append(f"{self.full_post_url}?limit={limit}&offset={current_offset}")
-                    current_offset += limit
-
-                # Use ThreadPoolExecutor to make concurrent requests
-                with ThreadPoolExecutor(max_workers=self.client.num_workers) as executor:
-                    future_to_url = {
-                        executor.submit(self._post, url=url, data=data, params=params): url
-                        for url in urls
-                    }
-
-                    for future in as_completed(future_to_url):
-                        try:
-                            response = future.result()
-                            page_result = response.json()
-                            all_data.append(page_result)
-                        except Exception as exc:
-                            print(f"Request failed: {exc}")
+            offset = pagination.get("offset")
+            metadata = result.get("metadata")
+            total_available = metadata.get("results", {}).get("total_available", 0)
+
+            # Calculate how many more pages we need
+            remaining_pages = (total_available - limit) // limit
+            if (total_available - limit) % limit > 0:
+                remaining_pages += 1
+
+            # Generate all the URLs we need to fetch
+            urls = []
+            current_offset = offset + limit
+            for _ in range(remaining_pages):
+                urls.append(f"{self.full_post_url}?limit={limit}&offset={current_offset}")
+                current_offset += limit
+
+            # Use ThreadPoolExecutor to make concurrent requests
+            with ThreadPoolExecutor(max_workers=self.client.num_workers) as executor:
+                future_to_url = {
+                    executor.submit(self._post, url=url, data=data, params=params): url
+                    for url in urls
+                }
+
+                for future in as_completed(future_to_url):
+                    try:
+                        response = future.result()
+                        page_result = response.json()
+                        all_data.append(page_result)
+                    except Exception as exc:
+                        print(f"Request failed: {exc}")
 
         return all_data
 
@@ -127,8 +125,6 @@ def _fetch_post_parcl_property_ids(
                 if idx < len(parcl_property_ids_chunks) - 1:  # Don't delay after the last one
                     time.sleep(0.1)
 
-            # Helper functions to abstract raise statements
-
             # Collect results as they complete
             for future in as_completed(future_to_chunk):
                 chunk_num = future_to_chunk[future]
@@ -432,24 +428,20 @@ def _build_owner_filters(self, params: PropertyV2RetrieveParams) -> dict[str, An
 
         return owner_filters
 
-    def _validate_limit(self, limit: int | None) -> int:
-        """Validate limit parameter."""
+    def _set_limit_pagination(self, limit: int | None) -> tuple[int, bool]:
+        """Validate and set limit and auto pagination."""
         max_limit = RequestLimits.PROPERTY_V2_MAX.value
 
-        # If auto-paginate is enabled or no limit is provided, use maximum limit
-        if limit in (None, 0):
-            print(f"No limit provided. Setting limit to maximum value of {max_limit}.")
-            return max_limit
+        # If no limit is provided, use maximum limit and auto paginate
+        if limit == 0 or limit is None:
+            auto_paginate = True
+            print(f"""No limit provided. Using max limit of {max_limit}.
+                Auto pagination is {auto_paginate}""")
+            return max_limit, auto_paginate
 
-        # If limit exceeds maximum, cap it
-        if limit > max_limit:
-            print(
-                f"Supplied limit value is too large for requested endpoint."
-                f"Setting limit to maximum value of {max_limit}."
-            )
-            return max_limit
-
-        return limit
+        auto_paginate = False
+        print(f"Limit is set at {limit}. Auto pagiation is {auto_paginate}")
+        return limit, auto_paginate
 
     def _build_param_categories(
         self, params: PropertyV2RetrieveParams
@@ -609,13 +601,16 @@ def retrieve(
 
         # Set limit
         request_params = input_params.params.copy()
+        request_params["auto_paginate"] = False  # auto_paginate is False by default
 
         # Make request with params
         if data.get(PARCL_PROPERTY_IDS):
             request_params["limit"] = PARCL_PROPERTY_IDS_LIMIT
             results = self._fetch_post_parcl_property_ids(params=request_params, data=data)
         else:
-            request_params["limit"] = self._validate_limit(input_params.limit)
+            request_params["limit"], request_params["auto_paginate"] = self._set_limit_pagination(
+                input_params.limit
+            )
             results = self._fetch_post(params=request_params, data=data)
 
         # Get metadata from results
diff --git a/tests/test_property_v2.py b/tests/test_property_v2.py
@@ -208,20 +208,19 @@ def test_schema_with_none_values() -> None:
 
 
 def test_validate_limit(property_v2_service: PropertyV2Service) -> None:
-    assert property_v2_service._validate_limit(limit=None) == RequestLimits.PROPERTY_V2_MAX.value
-    assert property_v2_service._validate_limit(limit=None) == RequestLimits.PROPERTY_V2_MAX.value
-    assert property_v2_service._validate_limit(limit=100) == 100
-    assert (
-        property_v2_service._validate_limit(limit=1000000000) == RequestLimits.PROPERTY_V2_MAX.value
+    assert property_v2_service._set_limit_pagination(limit=None) == (
+        RequestLimits.PROPERTY_V2_MAX.value,
+        True,
     )
+    assert property_v2_service._set_limit_pagination(limit=100) == (100, False)
 
 
 @patch.object(PropertyV2Service, "_post")
 def test_fetch_post_single_page(
     mock_post: Mock, property_v2_service: PropertyV2Service, mock_response: Mock
 ) -> None:
     mock_post.return_value = mock_response
-    result = property_v2_service._fetch_post(params={}, data={})
+    result = property_v2_service._fetch_post(params={"auto_paginate": False}, data={})
 
     assert len(result) == 1
     assert result[0] == mock_response.json()
@@ -251,7 +250,7 @@ def test_fetch_post_pagination(mock_post: Mock, property_v2_service: PropertyV2S
     # Set up the mock to return different responses
     mock_post.side_effect = [first_response, second_response]
 
-    result = property_v2_service._fetch_post(params={"limit": 1}, data={})
+    result = property_v2_service._fetch_post(params={"limit": 1, "auto_paginate": False}, data={})
 
     assert len(result) == 1
     assert result[0]["data"][0]["parcl_id"] == 123
@@ -311,7 +310,7 @@ def test_retrieve(
 
     # check that the correct data was passed to _fetch_post
     call_args = mock_fetch_post.call_args[1]
-    assert call_args["params"] == {"limit": 10}
+    assert call_args["params"] == {"limit": 10, "auto_paginate": False}
 
     data = call_args["data"]
     assert data["parcl_ids"] == [123]

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-VERSION = "1.16.0"`
	`1`	`+VERSION = "1.16.1"`