Skip to content

Commit 56173fc

Browse files
authored
Add client class (#214)
Introduces the`DataCommonsClient` class to interact with the Data Commons API, along with various utility functions and tests.
1 parent b3178e6 commit 56173fc

File tree

9 files changed

+479
-110
lines changed

9 files changed

+479
-110
lines changed

datacommons_client/client.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from typing import Literal, Optional
2+
3+
from datacommons_client.endpoints.base import API
4+
from datacommons_client.endpoints.node import NodeEndpoint
5+
from datacommons_client.endpoints.observation import ObservationEndpoint
6+
from datacommons_client.endpoints.payloads import ObservationDate
7+
from datacommons_client.endpoints.resolve import ResolveEndpoint
8+
from datacommons_client.utils.decorators import requires_pandas
9+
10+
try:
11+
import pandas as pd
12+
except ImportError:
13+
pd = None
14+
15+
16+
class DataCommonsClient:
17+
"""
18+
A client for interacting with the Data Commons API.
19+
20+
This class provides convenient access to the V2 Data Commons API endpoints.
21+
22+
Attributes:
23+
api (API): An instance of the API class that handles requests.
24+
node (NodeEndpoint): Provides access to node-related queries, such as fetching property labels
25+
and values for individual or multiple nodes in the Data Commons knowledge graph.
26+
observation (ObservationEndpoint): Handles observation-related queries, allowing retrieval of
27+
statistical observations associated with entities, variables, and dates (e.g., GDP of California in 2010).
28+
resolve (ResolveEndpoint): Manages resolution queries to find different DCIDs for entities.
29+
30+
"""
31+
32+
def __init__(
33+
self,
34+
api_key: Optional[str] = None,
35+
*,
36+
dc_instance: Optional[str] = "datacommons.org",
37+
url: Optional[str] = None,
38+
):
39+
"""
40+
Initializes the DataCommonsClient.
41+
42+
Args:
43+
api_key (Optional[str]): The API key for authentication. Defaults to None. Note that
44+
custom DC instances do not currently require an API key.
45+
dc_instance (Optional[str]): The Data Commons instance to use. Defaults to "datacommons.org".
46+
url (Optional[str]): A custom, fully resolved URL for the Data Commons API. Defaults to None.
47+
"""
48+
# Create an instance of the API class which will be injected to the endpoints
49+
self.api = API(api_key=api_key, dc_instance=dc_instance, url=url)
50+
51+
# Create instances of the endpoints
52+
self.node = NodeEndpoint(api=self.api)
53+
self.observation = ObservationEndpoint(api=self.api)
54+
self.resolve = ResolveEndpoint(api=self.api)
55+
56+
@requires_pandas
57+
def observations_dataframe(
58+
self,
59+
variable_dcids: str | list[str],
60+
date: ObservationDate | str,
61+
entity_dcids: Literal["all"] | list[str] = "all",
62+
entity_type: Optional[str] = None,
63+
parent_entity: Optional[str] = None,
64+
):
65+
"""
66+
Fetches statistical observations and returns them as a Pandas DataFrame.
67+
68+
The Observation API fetches statistical observations linked to entities and variables
69+
at a particular date (e.g., "population of USA in 2020", "GDP of California in 2010").
70+
71+
Args:
72+
variable_dcids (str | list[str]): One or more variable DCIDs for the observation.
73+
date (ObservationDate | str): The date for which observations are requested. It can be
74+
a specific date, "all" to retrieve all observations, or "latest" to get the most recent observations.
75+
entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs to retrieve data for.
76+
Defaults to "all". DCIDs must include their type (e.g "country/GTM" for Guatemala).
77+
entity_type (Optional[str], optional): The type of entities to filter by when `entity_dcids="all"`.
78+
Required if `entity_dcids="all"`. Defaults to None.
79+
parent_entity (Optional[str], optional): The parent entity under which the target entities fall.
80+
Used only when `entity_dcids="all"`. Defaults to None.
81+
82+
Returns:
83+
pd.DataFrame: A DataFrame containing the requested observations.
84+
"""
85+
86+
if entity_dcids == "all" and not entity_type:
87+
raise ValueError(
88+
"When 'entity_dcids' is 'all', 'entity_type' must be specified.")
89+
90+
if entity_dcids != "all" and (entity_type or parent_entity):
91+
raise ValueError(
92+
"Specify 'entity_type' and 'parent_entity' only when 'entity_dcids' is 'all'."
93+
)
94+
95+
if entity_dcids == "all":
96+
observations = self.observation.fetch_observations_by_entity_type(
97+
variable_dcids=variable_dcids,
98+
date=date,
99+
entity_type=entity_type,
100+
parent_entity=parent_entity,
101+
)
102+
else:
103+
observations = self.observation.fetch_observations_by_entity(
104+
variable_dcids=variable_dcids, date=date, entity_dcids=entity_dcids)
105+
106+
return pd.DataFrame(observations.get_observations_as_records())

datacommons_client/endpoints/observation.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def fetch_observations_by_entity_type(
104104
parent_entity: str,
105105
entity_type: str,
106106
variable_dcids: str | list[str],
107-
):
107+
) -> ObservationResponse:
108108
"""
109109
Fetches all observations for a given entity type.
110110
@@ -143,3 +143,43 @@ def fetch_observations_by_entity_type(
143143
entity_expression=
144144
f"{parent_entity}<-containedInPlace+{{typeOf:{entity_type}}}",
145145
)
146+
147+
def fetch_observations_by_entity(
148+
self,
149+
date: ObservationDate | str,
150+
entity_dcids: str | list[str],
151+
variable_dcids: str | list[str],
152+
) -> ObservationResponse:
153+
"""
154+
Fetches all observations for a given entity type.
155+
156+
Args:
157+
date (ObservationDate | str): The date option for the observations.
158+
Use 'all' for all dates, 'latest' for the most recent data,
159+
or provide a date as a string (e.g., "2024").
160+
entity_dcids (str | list[str]): One or more entity IDs to filter the data.
161+
variable_dcids (str | list[str]): The variable(s) to fetch observations for.
162+
This can be a single variable ID or a list of IDs.
163+
164+
Returns:
165+
ObservationResponse: The response object containing observations for the specified entity type.
166+
167+
Example:
168+
To fetch all observations for Nigeria for a specific variable:
169+
170+
```python
171+
api = API()
172+
ObservationEndpoint(api).fetch_observations_by_entity(
173+
date="all",
174+
entity_dcids="country/NGA",
175+
variable_dcids="sdg/SI_POV_DAY1"
176+
)
177+
```
178+
"""
179+
180+
return self.fetch(
181+
variable_dcids=variable_dcids,
182+
date=date,
183+
entity_dcids=entity_dcids,
184+
select=[s for s in ObservationSelect],
185+
)

datacommons_client/endpoints/response.py

Lines changed: 3 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
from datacommons_client.models.observation import Variable
1313
from datacommons_client.models.observation import variableDCID
1414
from datacommons_client.models.resolve import Entity
15+
from datacommons_client.utils.data_processing import flatten_properties
16+
from datacommons_client.utils.data_processing import observations_as_records
17+
from datacommons_client.utils.data_processing import unpack_arcs
1518

1619

1720
@dataclass
@@ -71,55 +74,6 @@ def json(self):
7174
return asdict(self)
7275

7376

74-
def flatten_properties(data: Dict[str, Any]) -> Dict[str, Any]:
75-
"""
76-
Flatten the properties of a node response.
77-
78-
Processes a dictionary of node responses, extracting and
79-
simplifying their properties and arcs into a flattened dictionary.
80-
81-
Args:
82-
data (Dict[str, Dict[str, Any]]):
83-
The input dictionary containing node responses. Each node maps to
84-
a dictionary with potential "arcs" and "properties" keys.
85-
86-
Returns:
87-
Dict[str, Any]:
88-
A flattened dictionary where keys are node identifiers, and values
89-
are the simplified properties or nodes.
90-
"""
91-
92-
# Store simplified properties
93-
items = {}
94-
95-
for node, node_data in data.items():
96-
# If arcs are present, process them
97-
if hasattr(node_data, "arcs"):
98-
processed_arcs = _unpack_arcs(node_data.arcs)
99-
if processed_arcs is not None:
100-
items[node] = processed_arcs
101-
continue
102-
103-
# Include properties if present
104-
if hasattr(node_data, "properties"):
105-
items[node] = node_data.properties
106-
107-
return items
108-
109-
110-
def _unpack_arcs(arcs: Dict[str, Any]) -> Any:
111-
"""Simplify the 'arcs' structure."""
112-
if len(arcs) > 1:
113-
# Multiple arcs: return dictionary of property nodes
114-
return {prop: arc_data["nodes"] for prop, arc_data in arcs.items()}
115-
116-
# Single arc: extract first node's data
117-
for property_data in arcs.values():
118-
nodes = property_data.nodes
119-
if nodes is not None:
120-
return nodes if len(nodes) > 1 else nodes[0]
121-
122-
12377
@dataclass
12478
class ObservationResponse:
12579
"""Represents a response from the Observation endpoint of the Data Commons API.
@@ -170,61 +124,6 @@ def get_observations_as_records(self) -> List[Dict[str, Any]]:
170124
facets=self.facets)
171125

172126

173-
def extract_observations(variable: str, entity: str, entity_data: dict,
174-
facet_metadata: dict) -> list[dict]:
175-
"""
176-
Extracts observations for a given variable, entity, and its data.
177-
178-
Args:
179-
variable (str): The variable name.
180-
entity (str): The entity name.
181-
entity_data (dict): Data for the entity, including ordered facets.
182-
facet_metadata (dict): Metadata for facets.
183-
184-
Returns:
185-
list[dict]: A list of observation records.
186-
"""
187-
# Store observation records
188-
records = []
189-
190-
# Extract observations
191-
for facet in entity_data.get("orderedFacets", []):
192-
facet_id = facet.facetId
193-
metadata = facet_metadata.get(facet_id, {})
194-
records.extend({
195-
"date": observation.date,
196-
"entity": entity,
197-
"variable": variable,
198-
"value": observation.value,
199-
"facetId": facet_id,
200-
**asdict(metadata),
201-
} for observation in facet.observations)
202-
return records
203-
204-
205-
def observations_as_records(data: dict, facets: dict) -> list[dict]:
206-
"""
207-
Converts observation data into a list of records.
208-
209-
Args:
210-
data (dict): A mapping of variables to entities and their data.
211-
facets (dict): Facet metadata for the observations.
212-
213-
Returns:
214-
list[dict]: A flattened list of observation records.
215-
"""
216-
return [
217-
record for variable, entities in data.items()
218-
for entity, entity_data in entities.items()
219-
for record in extract_observations(
220-
variable=variable,
221-
entity=entity,
222-
entity_data=entity_data,
223-
facet_metadata=facets,
224-
)
225-
]
226-
227-
228127
@dataclass
229128
class ResolveResponse:
230129
"""Represents a response from the Resolve endpoint of the Data Commons API.

datacommons_client/models/observation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def from_json(cls, json_data: Dict[str, Any]) -> "Variable":
105105
OrderedFacets.from_json(facet_data)
106106
for facet_data in entity_data.get("orderedFacets", {})
107107
]
108-
} for entity, entity_data in json_data["byEntity"].items()
108+
} for entity, entity_data in json_data.get("byEntity", {}).items()
109109
})
110110

111111

datacommons_client/tests/endpoints/test_response.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
from datacommons_client.endpoints.response import _unpack_arcs
21
from datacommons_client.endpoints.response import DCResponse
3-
from datacommons_client.endpoints.response import extract_observations
4-
from datacommons_client.endpoints.response import flatten_properties
52
from datacommons_client.endpoints.response import NodeResponse
63
from datacommons_client.endpoints.response import ObservationResponse
74
from datacommons_client.endpoints.response import ResolveResponse
85
from datacommons_client.models.observation import Facet
96
from datacommons_client.models.observation import Observation
107
from datacommons_client.models.observation import OrderedFacets
118
from datacommons_client.models.observation import Variable
9+
from datacommons_client.utils.data_processing import extract_observations
10+
from datacommons_client.utils.data_processing import flatten_properties
11+
from datacommons_client.utils.data_processing import unpack_arcs
1212

1313
### ----- Test DCResponse ----- ###
1414

@@ -153,7 +153,7 @@ def test_unpack_arcs_multiple_properties():
153153
}, # Empty nodes for completeness
154154
}
155155

156-
result = _unpack_arcs(arcs)
156+
result = unpack_arcs(arcs)
157157

158158
# Expected output
159159
expected = {

0 commit comments

Comments
 (0)