diff --git a/CHANGELOG.md b/CHANGELOG.md index 940058c..35669c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +### v1.16.1 +- Fix bug in auto pagination logic + ### v1.16.0 - Added the `entity_seller_name` parameter to the `property_v2.search` endpoint diff --git a/parcllabs/__version__.py b/parcllabs/__version__.py index 781742d..d5704ef 100644 --- a/parcllabs/__version__.py +++ b/parcllabs/__version__.py @@ -1 +1 @@ -VERSION = "1.16.0" +VERSION = "1.16.1" diff --git a/parcllabs/services/properties/property_v2.py b/parcllabs/services/properties/property_v2.py index 7051eb8..0aa720c 100644 --- a/parcllabs/services/properties/property_v2.py +++ b/parcllabs/services/properties/property_v2.py @@ -32,48 +32,46 @@ def _fetch_post(self, params: dict[str, Any], data: dict[str, Any]) -> list[dict result = response.json() all_data = [result] + + if params["auto_paginate"] is False: + return all_data + + # If we need to paginate, use concurrent requests pagination = result.get("pagination") - metadata = result.get("metadata") + if pagination.get("has_more"): + print("More pages to fetch, paginating additional pages...") - if pagination: limit = pagination.get("limit") - returned_count = metadata.get("results", {}).get("returned_count", 0) - # if we got fewer or equal results than requested, don't paginate - if returned_count <= limit: - return all_data - - # If we need to paginate, use concurrent requests - if pagination.get("has_more"): - print("More pages to fetch, paginating additional pages...") - offset = pagination.get("offset") - total_available = metadata.get("results", {}).get("total_available", 0) - - # Calculate how many more pages we need - remaining_pages = (total_available - limit) // limit - if (total_available - limit) % limit > 0: - remaining_pages += 1 - - # Generate all the URLs we need to fetch - urls = [] - current_offset = offset + limit - for _ in range(remaining_pages): - urls.append(f"{self.full_post_url}?limit={limit}&offset={current_offset}") - current_offset += limit - - # Use ThreadPoolExecutor to make concurrent requests - with ThreadPoolExecutor(max_workers=self.client.num_workers) as executor: - future_to_url = { - executor.submit(self._post, url=url, data=data, params=params): url - for url in urls - } - - for future in as_completed(future_to_url): - try: - response = future.result() - page_result = response.json() - all_data.append(page_result) - except Exception as exc: - print(f"Request failed: {exc}") + offset = pagination.get("offset") + metadata = result.get("metadata") + total_available = metadata.get("results", {}).get("total_available", 0) + + # Calculate how many more pages we need + remaining_pages = (total_available - limit) // limit + if (total_available - limit) % limit > 0: + remaining_pages += 1 + + # Generate all the URLs we need to fetch + urls = [] + current_offset = offset + limit + for _ in range(remaining_pages): + urls.append(f"{self.full_post_url}?limit={limit}&offset={current_offset}") + current_offset += limit + + # Use ThreadPoolExecutor to make concurrent requests + with ThreadPoolExecutor(max_workers=self.client.num_workers) as executor: + future_to_url = { + executor.submit(self._post, url=url, data=data, params=params): url + for url in urls + } + + for future in as_completed(future_to_url): + try: + response = future.result() + page_result = response.json() + all_data.append(page_result) + except Exception as exc: + print(f"Request failed: {exc}") return all_data @@ -127,8 +125,6 @@ def _fetch_post_parcl_property_ids( if idx < len(parcl_property_ids_chunks) - 1: # Don't delay after the last one time.sleep(0.1) - # Helper functions to abstract raise statements - # Collect results as they complete for future in as_completed(future_to_chunk): chunk_num = future_to_chunk[future] @@ -432,24 +428,20 @@ def _build_owner_filters(self, params: PropertyV2RetrieveParams) -> dict[str, An return owner_filters - def _validate_limit(self, limit: int | None) -> int: - """Validate limit parameter.""" + def _set_limit_pagination(self, limit: int | None) -> tuple[int, bool]: + """Validate and set limit and auto pagination.""" max_limit = RequestLimits.PROPERTY_V2_MAX.value - # If auto-paginate is enabled or no limit is provided, use maximum limit - if limit in (None, 0): - print(f"No limit provided. Setting limit to maximum value of {max_limit}.") - return max_limit + # If no limit is provided, use maximum limit and auto paginate + if limit == 0 or limit is None: + auto_paginate = True + print(f"""No limit provided. Using max limit of {max_limit}. + Auto pagination is {auto_paginate}""") + return max_limit, auto_paginate - # If limit exceeds maximum, cap it - if limit > max_limit: - print( - f"Supplied limit value is too large for requested endpoint." - f"Setting limit to maximum value of {max_limit}." - ) - return max_limit - - return limit + auto_paginate = False + print(f"Limit is set at {limit}. Auto pagiation is {auto_paginate}") + return limit, auto_paginate def _build_param_categories( self, params: PropertyV2RetrieveParams @@ -609,13 +601,16 @@ def retrieve( # Set limit request_params = input_params.params.copy() + request_params["auto_paginate"] = False # auto_paginate is False by default # Make request with params if data.get(PARCL_PROPERTY_IDS): request_params["limit"] = PARCL_PROPERTY_IDS_LIMIT results = self._fetch_post_parcl_property_ids(params=request_params, data=data) else: - request_params["limit"] = self._validate_limit(input_params.limit) + request_params["limit"], request_params["auto_paginate"] = self._set_limit_pagination( + input_params.limit + ) results = self._fetch_post(params=request_params, data=data) # Get metadata from results diff --git a/tests/test_property_v2.py b/tests/test_property_v2.py index 7f460c2..b1e46bd 100644 --- a/tests/test_property_v2.py +++ b/tests/test_property_v2.py @@ -208,12 +208,11 @@ def test_schema_with_none_values() -> None: def test_validate_limit(property_v2_service: PropertyV2Service) -> None: - assert property_v2_service._validate_limit(limit=None) == RequestLimits.PROPERTY_V2_MAX.value - assert property_v2_service._validate_limit(limit=None) == RequestLimits.PROPERTY_V2_MAX.value - assert property_v2_service._validate_limit(limit=100) == 100 - assert ( - property_v2_service._validate_limit(limit=1000000000) == RequestLimits.PROPERTY_V2_MAX.value + assert property_v2_service._set_limit_pagination(limit=None) == ( + RequestLimits.PROPERTY_V2_MAX.value, + True, ) + assert property_v2_service._set_limit_pagination(limit=100) == (100, False) @patch.object(PropertyV2Service, "_post") @@ -221,7 +220,7 @@ def test_fetch_post_single_page( mock_post: Mock, property_v2_service: PropertyV2Service, mock_response: Mock ) -> None: mock_post.return_value = mock_response - result = property_v2_service._fetch_post(params={}, data={}) + result = property_v2_service._fetch_post(params={"auto_paginate": False}, data={}) assert len(result) == 1 assert result[0] == mock_response.json() @@ -251,7 +250,7 @@ def test_fetch_post_pagination(mock_post: Mock, property_v2_service: PropertyV2S # Set up the mock to return different responses mock_post.side_effect = [first_response, second_response] - result = property_v2_service._fetch_post(params={"limit": 1}, data={}) + result = property_v2_service._fetch_post(params={"limit": 1, "auto_paginate": False}, data={}) assert len(result) == 1 assert result[0]["data"][0]["parcl_id"] == 123 @@ -311,7 +310,7 @@ def test_retrieve( # check that the correct data was passed to _fetch_post call_args = mock_fetch_post.call_args[1] - assert call_args["params"] == {"limit": 10} + assert call_args["params"] == {"limit": 10, "auto_paginate": False} data = call_args["data"] assert data["parcl_ids"] == [123]