diff --git a/README.md b/README.md index cafb070..60125c6 100644 --- a/README.md +++ b/README.md @@ -487,7 +487,10 @@ Gets a list of unique properties and their associated metadata and events based 2. `parcl_property_ids` 3. `geo_coordinates` (must provide latitude, longitude, and radius) -Use limit to specify the number of matched properties to return. Set auto_paginate to `True` to retrieve all results, this will override the limit. +**NOTE:** Use the `limit` parameter to specify the number of matched properties to return. If `limit` is not provided, all matched properties will be returned. Conceptually, you should set the `limit` to retrieve a sample of properties, and then if you want to retrieve all properties, make the same request again without the `limit` parameter. + + +Example request, note that only one of `parcl_ids`, `parcl_property_ids`, or `geo_coordinates` can be provided per request: ```python results, filter_data = client.property_v2.search.retrieve( @@ -513,8 +516,7 @@ results, filter_data = client.property_v2.search.retrieve( max_record_added_date="2024-12-31", min_record_added_date="2024-12-13", property_types=["SINGLE_FAMILY", "CONDO", "TOWNHOUSE"], - limit=10, - # auto_paginate=True, + limit=100, ) ``` @@ -524,4 +526,4 @@ Monitor your API usage and quota limits by calling the `account()` method in the ```python client = ParclLabsClient(api_key) account_info = client.account() -``` \ No newline at end of file +``` diff --git a/parcllabs/enums.py b/parcllabs/enums.py index a54879d..823b066 100644 --- a/parcllabs/enums.py +++ b/parcllabs/enums.py @@ -12,7 +12,7 @@ class RequestLimits(Enum): MAX_POST = 1000 DEFAULT_SMALL = 1000 DEFAULT_LARGE = 10000 - PROPERTY_V2_MAX = 100000 + PROPERTY_V2_MAX = 50000 class ResponseCodes(Enum): diff --git a/parcllabs/services/properties/property_v2.py b/parcllabs/services/properties/property_v2.py index 43a1237..45cb9b0 100644 --- a/parcllabs/services/properties/property_v2.py +++ b/parcllabs/services/properties/property_v2.py @@ -13,9 +13,7 @@ class PropertyV2Service(ParclLabsService): def __init__(self, *args: object, **kwargs: object) -> None: super().__init__(*args, **kwargs) - def _fetch_post( - self, params: dict[str, Any], data: dict[str, Any], auto_paginate: bool - ) -> list[dict]: + def _fetch_post(self, params: dict[str, Any], data: dict[str, Any]) -> list[dict]: """Fetch data using POST request with pagination support.""" response = self._post(url=self.full_post_url, data=data, params=params) result = response.json() @@ -24,38 +22,45 @@ def _fetch_post( metadata = result.get("metadata") all_data = [result] - # If we need to paginate, use concurrent requests - if auto_paginate and pagination and pagination.get("has_more"): + returned_count = metadata.get("results", {}).get("returned_count", 0) + + if pagination: limit = pagination.get("limit") - offset = pagination.get("offset") - total_count = metadata.get("results", {}).get("total_available", 0) - - # Calculate how many more pages we need - remaining_pages = (total_count - limit) // limit - if (total_count - limit) % limit > 0: - remaining_pages += 1 - - # Generate all the URLs we need to fetch - urls = [] - current_offset = offset + limit - for _ in range(remaining_pages): - urls.append(f"{self.full_post_url}?limit={limit}&offset={current_offset}") - current_offset += limit - - # Use ThreadPoolExecutor to make concurrent requests - with ThreadPoolExecutor(max_workers=self.client.num_workers) as executor: - future_to_url = { - executor.submit(self._post, url=url, data=data, params=params): url - for url in urls - } - - for future in as_completed(future_to_url): - try: - response = future.result() - page_result = response.json() - all_data.append(page_result) - except Exception as exc: - print(f"Request failed: {exc}") + if returned_count == limit: + return all_data + + # If we need to paginate, use concurrent requests + if pagination.get("has_more"): + print("More pages to fetch, paginating additional pages...") + offset = pagination.get("offset") + total_count = metadata.get("results", {}).get("total_available", 0) + + # Calculate how many more pages we need + remaining_pages = (total_count - limit) // limit + if (total_count - limit) % limit > 0: + remaining_pages += 1 + + # Generate all the URLs we need to fetch + urls = [] + current_offset = offset + limit + for _ in range(remaining_pages): + urls.append(f"{self.full_post_url}?limit={limit}&offset={current_offset}") + current_offset += limit + + # Use ThreadPoolExecutor to make concurrent requests + with ThreadPoolExecutor(max_workers=self.client.num_workers) as executor: + future_to_url = { + executor.submit(self._post, url=url, data=data, params=params): url + for url in urls + } + + for future in as_completed(future_to_url): + try: + response = future.result() + page_result = response.json() + all_data.append(page_result) + except Exception as exc: + print(f"Request failed: {exc}") return all_data @@ -264,14 +269,13 @@ def _build_owner_filters(self, **kwargs: dict) -> dict[str, Any]: return owner_filters - def _validate_limit(self, limit: int | None, auto_paginate: bool) -> int: + def _validate_limit(self, limit: int | None) -> int: """Validate limit parameter.""" max_limit = RequestLimits.PROPERTY_V2_MAX.value # If auto-paginate is enabled or no limit is provided, use maximum limit - if auto_paginate or limit is None: - if auto_paginate and limit is not None: - print(f"Auto-paginate is enabled. Setting limit to maximum value of {max_limit}.") + if limit in (None, 0): + print(f"No limit provided. Setting limit to maximum value of {max_limit}.") return max_limit # If limit exceeds maximum, cap it @@ -315,7 +319,6 @@ def retrieve( is_owner_occupied: bool | None = None, limit: int | None = None, params: Mapping[str, Any] | None = {}, - auto_paginate: bool = False, ) -> tuple[pd.DataFrame, dict[str, Any]]: """ Retrieve property data based on search criteria and filters. @@ -353,6 +356,8 @@ def retrieve( Returns: A pandas DataFrame containing the property data. """ + print("Processing property search request...") + # Build search criteria data = self._build_search_criteria( parcl_ids=parcl_ids, @@ -378,10 +383,10 @@ def retrieve( data["event_filters"] = self._build_event_filters(**kwargs) data["owner_filters"] = self._build_owner_filters(**kwargs) - params["limit"] = self._validate_limit(limit, auto_paginate) + params["limit"] = self._validate_limit(limit) - # Make request with pagination - results = self._fetch_post(params=params, data=data, auto_paginate=auto_paginate) + # Make request with params + results = self._fetch_post(params=params, data=data) # Get metadata from results metadata = self._get_metadata(results) diff --git a/tests/test_property_v2.py b/tests/test_property_v2.py index 66a9a2a..4488395 100644 --- a/tests/test_property_v2.py +++ b/tests/test_property_v2.py @@ -141,22 +141,11 @@ def test_build_owner_filters(property_v2_service: PropertyV2Service) -> None: def test_validate_limit(property_v2_service: PropertyV2Service) -> None: + assert property_v2_service._validate_limit(limit=None) == RequestLimits.PROPERTY_V2_MAX.value + assert property_v2_service._validate_limit(limit=None) == RequestLimits.PROPERTY_V2_MAX.value + assert property_v2_service._validate_limit(limit=100) == 100 assert ( - property_v2_service._validate_limit(limit=None, auto_paginate=True) - == RequestLimits.PROPERTY_V2_MAX.value - ) - assert ( - property_v2_service._validate_limit(limit=None, auto_paginate=False) - == RequestLimits.PROPERTY_V2_MAX.value - ) - assert ( - property_v2_service._validate_limit(limit=100, auto_paginate=True) - == RequestLimits.PROPERTY_V2_MAX.value - ) - assert property_v2_service._validate_limit(limit=100, auto_paginate=False) == 100 - assert ( - property_v2_service._validate_limit(limit=1000000000, auto_paginate=True) - == RequestLimits.PROPERTY_V2_MAX.value + property_v2_service._validate_limit(limit=1000000000) == RequestLimits.PROPERTY_V2_MAX.value ) @@ -165,7 +154,7 @@ def test_fetch_post_single_page( mock_post: Mock, property_v2_service: PropertyV2Service, mock_response: Mock ) -> None: mock_post.return_value = mock_response - result = property_v2_service._fetch_post(params={}, data={}, auto_paginate=False) + result = property_v2_service._fetch_post(params={}, data={}) assert len(result) == 1 assert result[0] == mock_response.json() @@ -195,12 +184,11 @@ def test_fetch_post_pagination(mock_post: Mock, property_v2_service: PropertyV2S # Set up the mock to return different responses mock_post.side_effect = [first_response, second_response] - result = property_v2_service._fetch_post(params={"limit": 1}, data={}, auto_paginate=True) + result = property_v2_service._fetch_post(params={"limit": 1}, data={}) - assert len(result) == 2 + assert len(result) == 1 assert result[0]["data"][0]["parcl_id"] == 123 - assert result[1]["data"][0]["parcl_id"] == 456 - assert mock_post.call_count == 2 + assert mock_post.call_count == 1 def test_as_pd_dataframe(property_v2_service: PropertyV2Service, mock_response: Mock) -> None: