Skip to content

Commit 16cbe1b

Browse files
Refactor normalization workflow
1 parent 0086240 commit 16cbe1b

File tree

5 files changed

+295
-92
lines changed

5 files changed

+295
-92
lines changed

README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,47 @@ df = client.api_to_dataframe(data)
7878
print(df)
7979
```
8080

81+
### Normalization options
82+
83+
`ClientBuilder.api_to_dataframe` accepts keyword arguments that are
84+
forwarded to [`pandas.json_normalize`](https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html).
85+
This makes it possible to flatten nested payloads and include metadata
86+
fields easily:
87+
88+
```python
89+
payload = {
90+
"meta": {"page": 1},
91+
"items": [
92+
{"id": 1, "value": "alpha"},
93+
{"id": 2, "value": "beta"},
94+
],
95+
}
96+
97+
client = ClientBuilder(endpoint="https://api.example.com")
98+
99+
df = client.api_to_dataframe(
100+
payload,
101+
record_path="items",
102+
meta=[["meta", "page"]],
103+
errors="ignore", # optionally avoid raising when paths are missing
104+
)
105+
```
106+
107+
When complex transformations are required before normalization, provide a
108+
`transformer` callable to the constructor. The callable receives the JSON
109+
payload returned by `get_api_data` and can return any structure supported
110+
by `pandas.json_normalize`:
111+
112+
```python
113+
client = ClientBuilder(
114+
endpoint="https://api.example.com",
115+
transformer=lambda payload: payload["data"],
116+
)
117+
118+
data = client.get_api_data()
119+
df = client.api_to_dataframe(data)
120+
```
121+
81122
## Important notes:
82123
* **Opcionals Parameters:** The params timeout, retry_strategy and headers are opcionals.
83124
* **Default Params Value:** By default the quantity of retries is 3 and the time between retries is 1 second, but you can define manually.

src/api_to_dataframe/controller/client_builder.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Any, Callable, Optional, Sequence, Union
2+
13
from api_to_dataframe.models.retainer import retry_strategies, Strategies
24
from api_to_dataframe.models.get_data import GetData
35
from api_to_dataframe.utils.logger import logger
@@ -12,6 +14,7 @@ def __init__( # pylint: disable=too-many-positional-arguments,too-many-argument
1214
retries: int = 3,
1315
initial_delay: int = 1,
1416
connection_timeout: int = 1,
17+
transformer: Optional[Callable[[Any], Any]] = None,
1518
):
1619
"""
1720
Initializes the ClientBuilder object.
@@ -23,6 +26,8 @@ def __init__( # pylint: disable=too-many-positional-arguments,too-many-argument
2326
retries (int): The number of times to retry a failed request. Defaults to 3.
2427
initial_delay (int): The delay between retries in seconds. Defaults to 1.
2528
connection_timeout (int): The timeout for the connection in seconds. Defaults to 1.
29+
transformer (Optional[Callable[[Any], Any]]): Optional callable to
30+
transform the JSON payload before DataFrame normalization.
2631
2732
Raises:
2833
ValueError: If endpoint is an empty string.
@@ -56,6 +61,7 @@ def __init__( # pylint: disable=too-many-positional-arguments,too-many-argument
5661
self.headers = headers
5762
self.retries = retries
5863
self.delay = initial_delay
64+
self.transformer = transformer
5965

6066
@retry_strategies
6167
def get_api_data(self):
@@ -77,19 +83,39 @@ def get_api_data(self):
7783

7884
return response.json()
7985

80-
@staticmethod
81-
def api_to_dataframe(response: dict):
82-
"""
83-
Converts an API response to a DataFrame.
84-
85-
This function takes a dictionary response from an API,
86-
uses the `to_dataframe` function from the `GetData` class
87-
to convert it into a DataFrame, and logs the operation as successful.
86+
def api_to_dataframe(
87+
self,
88+
response: Any,
89+
*,
90+
record_path: Optional[Union[str, Sequence[str]]] = None,
91+
meta: Optional[Sequence[Union[str, Sequence[str]]]] = None,
92+
errors: str = "raise",
93+
max_level: Optional[int] = None,
94+
):
95+
"""Normalize an API response into a pandas DataFrame.
8896
8997
Args:
90-
response (dict): The dictionary containing the API response.
98+
response (Any): The already decoded API response payload.
99+
record_path (Optional[Union[str, Sequence[str]]]): Path to nested
100+
records passed to :func:`pandas.json_normalize`.
101+
meta (Optional[Sequence[Union[str, Sequence[str]]]]): Metadata keys to
102+
include as columns in the result.
103+
errors (str): Error handling strategy to forward to
104+
:func:`pandas.json_normalize` ("raise" or "ignore").
105+
max_level (Optional[int]): Maximum depth for record flattening.
91106
92107
Returns:
93-
DataFrame: A pandas DataFrame containing the data from the API response.
108+
pandas.DataFrame: A DataFrame containing the normalized payload.
94109
"""
95-
return GetData.to_dataframe(response)
110+
111+
data = response
112+
if self.transformer is not None:
113+
data = self.transformer(response)
114+
115+
return GetData.to_dataframe(
116+
data,
117+
record_path=record_path,
118+
meta=meta,
119+
errors=errors,
120+
max_level=max_level,
121+
)

src/api_to_dataframe/models/get_data.py

Lines changed: 50 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
import requests
1+
from typing import Any, Optional, Sequence, Union
2+
23
import pandas as pd
4+
import requests
5+
36
from api_to_dataframe.utils.logger import logger
47

58

@@ -15,13 +18,54 @@ def get_response(endpoint: str, headers: dict, connection_timeout: int):
1518
return response
1619

1720
@staticmethod
18-
def to_dataframe(response):
19-
df = pd.DataFrame(response)
21+
def to_dataframe(
22+
response: Any,
23+
*,
24+
record_path: Optional[Union[str, Sequence[str]]] = None,
25+
meta: Optional[Sequence[Union[str, Sequence[str]]]] = None,
26+
errors: str = "raise",
27+
max_level: Optional[int] = None,
28+
) -> pd.DataFrame:
29+
"""Convert an API response object into a pandas DataFrame.
30+
31+
Args:
32+
response (Any): The API response already decoded to a Python object.
33+
record_path (Optional[Union[str, Sequence[str]]]): The path to records for
34+
nested data structures accepted by :func:`pandas.json_normalize`.
35+
meta (Optional[Sequence[Union[str, Sequence[str]]]]): Additional metadata
36+
to include as columns in the resulting DataFrame.
37+
errors (str): Error handling strategy from
38+
:func:`pandas.json_normalize` ("raise" or "ignore").
39+
max_level (Optional[int]): Max depth to normalize nested records.
40+
41+
Returns:
42+
pandas.DataFrame: A DataFrame containing the normalized data.
43+
44+
Raises:
45+
ValueError: If the normalization results in an empty DataFrame while the
46+
error strategy is not set to ``"ignore"``.
47+
"""
48+
49+
if response is None or response == "":
50+
error_msg = "::: Response payload is empty :::"
51+
logger.error(error_msg)
52+
raise ValueError(error_msg)
53+
54+
try:
55+
dataframe = pd.json_normalize(
56+
response,
57+
record_path=record_path,
58+
meta=meta,
59+
errors=errors,
60+
max_level=max_level,
61+
)
62+
except Exception as exc: # pragma: no cover - defensive logging
63+
logger.error("::: Failed to normalize response: %s :::", exc)
64+
raise
2065

21-
# Check if DataFrame is empty
22-
if df.empty:
66+
if dataframe.empty and errors != "ignore":
2367
error_msg = "::: DataFrame is empty :::"
2468
logger.error(error_msg)
2569
raise ValueError(error_msg)
2670

27-
return df
71+
return dataframe

tests/test_controller_client_builder.py

Lines changed: 54 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
import pytest
1+
"""Tests for the ClientBuilder controller."""
2+
23
import pandas as pd
4+
import pytest
35
import responses
46

57
from api_to_dataframe import ClientBuilder, RetryStrategies
68

79

810
@pytest.fixture()
911
def client_setup():
12+
"""Provide a ClientBuilder instance for constructor related tests."""
13+
1014
new_client = ClientBuilder(
1115
endpoint="https://economia.awesomeapi.com.br/last/USD-BRL"
1216
)
@@ -15,13 +19,17 @@ def client_setup():
1519

1620
@pytest.fixture()
1721
def response_setup():
18-
new_client = ClientBuilder(
19-
endpoint="https://economia.awesomeapi.com.br/last/USD-BRL"
20-
)
21-
return new_client.get_api_data()
22+
"""Provide a simple payload used to build DataFrames."""
23+
24+
return [
25+
{"code": "USD", "name": "Dollar"},
26+
{"code": "BRL", "name": "Real"},
27+
]
2228

2329

2430
def test_constructor_raises():
31+
"""Ensure validation errors are raised for invalid constructor arguments."""
32+
2533
with pytest.raises(ValueError):
2634
ClientBuilder(endpoint="")
2735

@@ -59,6 +67,8 @@ def test_constructor_raises():
5967

6068

6169
def test_constructor_with_param(client_setup): # pylint: disable=redefined-outer-name
70+
"""Ensure the endpoint argument is stored on the instance."""
71+
6272
expected_result = "https://economia.awesomeapi.com.br/last/USD-BRL"
6373
new_client = client_setup
6474
assert new_client.endpoint == expected_result
@@ -87,14 +97,29 @@ def test_constructor_with_retry_strategy():
8797
assert client.delay == 2
8898

8999

100+
@responses.activate
90101
def test_response_to_json(client_setup): # pylint: disable=redefined-outer-name
102+
"""Ensure the API response is decoded to JSON."""
103+
91104
new_client = client_setup
105+
endpoint = new_client.endpoint
106+
107+
responses.add(
108+
responses.GET,
109+
endpoint,
110+
json={"status": "ok"},
111+
status=200,
112+
)
113+
92114
response = new_client.get_api_data() # pylint: disable=protected-access
93115
assert isinstance(response, dict)
94116

95117

96118
def test_to_dataframe(response_setup): # pylint: disable=redefined-outer-name
97-
df = ClientBuilder.api_to_dataframe(response_setup)
119+
"""Ensure responses are converted into DataFrames using instance method."""
120+
121+
client = ClientBuilder(endpoint="https://economia.awesomeapi.com.br/last/USD-BRL")
122+
df = client.api_to_dataframe(response_setup)
98123
assert isinstance(df, pd.DataFrame)
99124

100125

@@ -118,3 +143,26 @@ def test_get_api_data_with_mocked_response():
118143
assert response == expected_data
119144
assert len(responses.calls) == 1
120145
assert responses.calls[0].request.url == endpoint
146+
147+
148+
def test_api_to_dataframe_supports_custom_normalization():
149+
"""Ensure normalization parameters are forwarded to the GetData helper."""
150+
151+
payload = {
152+
"meta": {"page": 1},
153+
"items": [
154+
{"id": 1, "name": "First"},
155+
{"id": 2, "name": "Second"},
156+
],
157+
}
158+
159+
client = ClientBuilder(endpoint="https://economia.awesomeapi.com.br/last/USD-BRL")
160+
161+
dataframe = client.api_to_dataframe(
162+
payload,
163+
record_path="items",
164+
meta=[["meta", "page"]],
165+
)
166+
167+
assert list(dataframe.columns) == ["id", "name", "meta.page"]
168+
assert dataframe.iloc[0]["meta.page"] == 1

0 commit comments

Comments
 (0)