Skip to content

Commit 398ddca

Browse files
authored
endpoint /businesses (#34)
* added new features for new endpoint * code refactoring * added readme; code refactoring * bump version * renamed method; code refactoring
1 parent 8e1c5aa commit 398ddca

File tree

6 files changed

+395
-12
lines changed

6 files changed

+395
-12
lines changed

examples/Businesses.md

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Business Details With Python
2+
3+
Retrieves a complete business profile for a specific business by `os_id`, `place_id`, or `google_id`, with optional enrichments via [Outscraper API](https://app.outscraper.cloud/api-docs#tag/businesses--poi/POST/businesses).
4+
5+
## Installation
6+
7+
Python 3+
8+
```bash
9+
pip install outscraper
10+
```
11+
12+
[Link to the Python package page](https://pypi.org/project/outscraper/)
13+
14+
## Initialization
15+
```python
16+
from outscraper import OutscraperClient
17+
18+
client = OutscraperClient(api_key='SECRET_API_KEY')
19+
```
20+
[Link to the profile page to create the API key](https://app.outscraper.cloud/profile)
21+
22+
## Usage
23+
24+
```python
25+
# Get business details by Outscraper ID (os_id):
26+
business = client.businesses.get('os_id')
27+
28+
# Get business details by Google Place ID:
29+
business = client.businesses.get('place_id')
30+
31+
# Get business details by Google Business ID (google_id):
32+
business = client.businesses.get('google_id')
33+
34+
# Request only specific fields (optional):
35+
business = client.businesses.get(
36+
'os_id',
37+
fields=['name', 'phone', 'website', 'address', 'rating', 'reviews']
38+
)
39+
40+
# Search (one page):
41+
from outscraper.schema.businesses import BusinessFilters
42+
43+
filters = BusinessFilters(
44+
country_code='US',
45+
states=['NY'],
46+
cities=['New York', 'Buffalo'],
47+
types=['restaurant', 'cafe'],
48+
has_website=True,
49+
has_phone=True,
50+
business_statuses=['operational'],
51+
)
52+
53+
result = client.businesses.search(
54+
filters=filters,
55+
limit=100,
56+
include_total=False,
57+
fields=[
58+
'name',
59+
'types',
60+
'address',
61+
'state',
62+
'postal_code',
63+
'country',
64+
'website',
65+
'phone',
66+
'rating',
67+
'reviews',
68+
'photo',
69+
]
70+
)
71+
72+
# Search with dict filters (alternative)
73+
result = client.businesses.search(
74+
filters={
75+
'country_code': 'US',
76+
'states': ['NY'],
77+
'types': ['restaurant', 'cafe'],
78+
'has_website': True,
79+
'business_statuses': ['operational'],
80+
},
81+
limit=50
82+
)
83+
84+
# Collect search parameters in one json:
85+
json = {
86+
'limit': 10,
87+
'cursor': None,
88+
'include_total': False,
89+
'fields': ['name', 'types', 'address', 'state', 'postal_code', 'country', 'website', 'phone', 'rating', 'reviews', 'photo'],
90+
'filters': {
91+
"country_code": "US",
92+
"states": [
93+
"NY"
94+
],
95+
"cities": [
96+
"New York",
97+
"Buffalo"
98+
],
99+
"types": [
100+
"restaurant",
101+
"cafe"
102+
],
103+
"has_website": True,
104+
"has_phone": True,
105+
"business_statuses": ["operational"],
106+
}
107+
}
108+
result = client.businesses.search(**json)
109+
110+
# Iterate over all results (auto-pagination)
111+
from outscraper.schema.businesses import BusinessFilters
112+
113+
filters = BusinessFilters(country_code='US', states=['NY'], business_statuses=['operational'])
114+
115+
for business in client.businesses.iter_search(
116+
filters=filters,
117+
limit=100,
118+
fields=['name', 'phone', 'address', 'rating', 'reviews']
119+
):
120+
# business is a Business dataclass instance
121+
print(business)
122+
```

outscraper/businesses.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
from __future__ import annotations
2+
from typing import Iterator, Optional, Union, Mapping, Any
3+
4+
from .schema.businesses import BusinessFilters, BusinessSearchResult
5+
6+
7+
FiltersLike = Union[BusinessFilters, Mapping[str, Any], None]
8+
9+
10+
class BusinessesAPI:
11+
def __init__(self, client: OutscraperClient) -> None:
12+
self._client = client
13+
14+
def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Optional[str] = None, include_total: bool = False,
15+
fields: Optional[list[str]] = None) -> BusinessSearchResult:
16+
'''
17+
Retrieve business records with optional enrichment data.
18+
19+
This endpoint provides access to millions of business listings with support for
20+
pagination and selective data enrichment. Use `cursor` from the previous response
21+
to fetch the next page.
22+
23+
Parameters:
24+
filters (BusinessFilters | dict | None): Filtering criteria. You can pass either
25+
BusinessFilters (recommended) or a raw dict matching the API schema.
26+
limit (int): Maximum number of business records to return for this page.
27+
Default: 10.
28+
cursor (str | None): Cursor for pagination to retrieve the next set of results.
29+
Default: None.
30+
include_total (bool): Whether to include the total count of matching records in the response. This could increase response time.
31+
Default: False.
32+
fields (list[str] | None): List of fields to include in the response. If not specified, all fields will be returned.
33+
34+
Returns:
35+
BusinessSearchResult: Page of businesses with pagination info.
36+
37+
See: https://app.outscraper.com/api-docs
38+
'''
39+
40+
if limit < 1 or limit > 1000:
41+
raise ValueError('limit must be in range [1, 1000]')
42+
43+
if filters is None:
44+
filters_payload = {}
45+
elif isinstance(filters, BusinessFilters):
46+
filters_payload = filters.to_payload()
47+
else:
48+
filters_payload = dict(filters)
49+
50+
payload = {
51+
'filters': filters_payload,
52+
'limit': limit,
53+
'cursor': cursor,
54+
'include_total': include_total,
55+
}
56+
if fields:
57+
payload['fields'] = list(fields)
58+
59+
response = self._client._request('POST', '/businesses', use_handle_response=False, json=payload)
60+
data = response.json()
61+
62+
if data.get('error'):
63+
error_message = data.get('errorMessage')
64+
raise Exception(f'error: {error_message}')
65+
66+
return BusinessSearchResult(
67+
items=data.get('items') or [],
68+
next_cursor=data.get('next_cursor'),
69+
has_more=bool(data.get('has_more')) or bool(data.get('next_cursor')),
70+
)
71+
72+
def iter_search(self, *, filters: FiltersLike = None, limit: int = 10, start_cursor: Optional[str] = None,
73+
include_total: bool = False, fields: Optional[list[str]] = None) -> Iterator[dict]:
74+
'''
75+
Iterate over businesses across all pages (auto-pagination).
76+
77+
This is a convenience generator over `search()`:
78+
- calls search()
79+
- yields each Business from the returned page
80+
- continues while next_cursor/has_more indicates more pages
81+
82+
Parameters:
83+
filters (BusinessFilters | dict | None): Same as `search()`.
84+
limit (int): Page size per request. Default: 10.
85+
start_cursor (str | None): If provided, iteration starts from this cursor.
86+
Default: None (start from first page).
87+
include_total (bool): Passed to `search()` (if supported by API).
88+
Default: False.
89+
fields (list[str] | None): Passed to `search()`.
90+
91+
Yields:
92+
item (dict): Each business record from all pages.
93+
94+
See: https://app.outscraper.com/api-docs
95+
'''
96+
97+
cursor = start_cursor
98+
99+
while True:
100+
business_search_result = self.search(filters=filters,
101+
limit=limit,
102+
cursor=cursor,
103+
include_total=include_total,
104+
fields=fields)
105+
106+
for item in business_search_result.items:
107+
yield item
108+
if not business_search_result.next_cursor and not business_search_result.has_more:
109+
break
110+
111+
cursor = business_search_result.next_cursor
112+
113+
def get(self, business_id: str, *, fields: Optional[list[str]] = None) -> dict:
114+
'''
115+
Get Business Details
116+
117+
Retrieves detailed information for a specific business by business_id.
118+
According to the API docs, business_id can be:
119+
- os_id
120+
- place_id
121+
- google_id
122+
123+
Parameters:
124+
business_id (str): Business identifier (os_id, place_id, or google_id).
125+
fields (list[str] | None): List of fields to include in the response.
126+
If not provided, API returns all fields.
127+
128+
Returns:
129+
data (dict): business with full details.
130+
131+
See: https://app.outscraper.com/api-docs
132+
'''
133+
134+
params = None
135+
if fields:
136+
params = {'fields': ','.join(fields)}
137+
138+
resp = self._client._request('GET', f'/businesses/{business_id}', use_handle_response=False, params=params)
139+
data = resp.json()
140+
if data.get('error'):
141+
error_message = data.get('errorMessage')
142+
raise Exception(f'error: {error_message}')
143+
144+
if not isinstance(data, dict):
145+
raise Exception(f'Unexpected response for /businesses/{business_id}: {type(data)}')
146+
147+
return data

outscraper/client.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from __future__ import annotations
2+
from functools import cached_property
3+
14
import requests
25
from typing import Union, Tuple, Optional
36

@@ -17,13 +20,11 @@ class OutscraperClient(object):
1720
'''
1821

1922

20-
def __init__(self, api_key: str, requests_pause: int = 5) -> None:
23+
def __init__(self, api_key: str) -> None:
2124
self._transport = OutscraperTransport(api_key=api_key)
22-
self._transport._requests_pause = requests_pause
2325

2426
def _request(self, method: str, path: str, *, wait_async: bool = False, async_request: bool = False, use_handle_response: bool = True, **kwargs):
25-
return self._transport.api_request(
26-
method,
27+
return self._transport.api_request(method,
2728
path,
2829
wait_async=wait_async,
2930
async_request=async_request,
@@ -35,13 +36,13 @@ def get_tasks(self, query: str = '', last_id: str = '', page_size: int = 10) ->
3536
'''
3637
Fetch user UI tasks.
3738
38-
Parameters:
39-
query (str): parameter specifies the search query (tag).
40-
last_id (str): parameter specifies the last task ID. It's commonly used in pagination.
41-
page_size (int): parameter specifies the number of items to return.
39+
Parameters:
40+
query (str): parameter specifies the search query (tag).
41+
last_id (str): parameter specifies the last task ID. It's commonly used in pagination.
42+
page_size (int): parameter specifies the number of items to return.
4243
43-
Returns:
44-
tuple[list, bool]: (tasks, has_more)
44+
Returns:
45+
tuple[list, bool]: (tasks, has_more)
4546
4647
See: https://app.outscraper.com/api-docs#tag/Outscraper-Platform-UI/paths/~1tasks/get
4748
'''
@@ -110,6 +111,11 @@ def get_request_archive(self, request_id: str) -> dict:
110111

111112
raise Exception(f'Response status code: {response.status_code}')
112113

114+
@cached_property
115+
def businesses(self):
116+
from .businesses import BusinessesAPI
117+
return BusinessesAPI(self)
118+
113119
def google_search(self, query: Union[list, str], pages_per_query: int = 1, uule: str = None, language: str = 'en', region: str = None,
114120
fields: Union[list, str] = None, async_request: bool = False, ui: bool = None, webhook: str = None
115121
) -> Union[list, dict]:

0 commit comments

Comments
 (0)