|
| 1 | +from typing import Literal, Optional |
| 2 | + |
| 3 | +from datacommons_client.endpoints.base import API |
| 4 | +from datacommons_client.endpoints.node import NodeEndpoint |
| 5 | +from datacommons_client.endpoints.observation import ObservationEndpoint |
| 6 | +from datacommons_client.endpoints.payloads import ObservationDate |
| 7 | +from datacommons_client.endpoints.resolve import ResolveEndpoint |
| 8 | +from datacommons_client.utils.decorators import requires_pandas |
| 9 | + |
| 10 | +try: |
| 11 | + import pandas as pd |
| 12 | +except ImportError: |
| 13 | + pd = None |
| 14 | + |
| 15 | + |
| 16 | +class DataCommonsClient: |
| 17 | + """ |
| 18 | + A client for interacting with the Data Commons API. |
| 19 | +
|
| 20 | + This class provides convenient access to the V2 Data Commons API endpoints. |
| 21 | +
|
| 22 | + Attributes: |
| 23 | + api (API): An instance of the API class that handles requests. |
| 24 | + node (NodeEndpoint): Provides access to node-related queries, such as fetching property labels |
| 25 | + and values for individual or multiple nodes in the Data Commons knowledge graph. |
| 26 | + observation (ObservationEndpoint): Handles observation-related queries, allowing retrieval of |
| 27 | + statistical observations associated with entities, variables, and dates (e.g., GDP of California in 2010). |
| 28 | + resolve (ResolveEndpoint): Manages resolution queries to find different DCIDs for entities. |
| 29 | +
|
| 30 | + """ |
| 31 | + |
| 32 | + def __init__( |
| 33 | + self, |
| 34 | + api_key: Optional[str] = None, |
| 35 | + *, |
| 36 | + dc_instance: Optional[str] = "datacommons.org", |
| 37 | + url: Optional[str] = None, |
| 38 | + ): |
| 39 | + """ |
| 40 | + Initializes the DataCommonsClient. |
| 41 | +
|
| 42 | + Args: |
| 43 | + api_key (Optional[str]): The API key for authentication. Defaults to None. Note that |
| 44 | + custom DC instances do not currently require an API key. |
| 45 | + dc_instance (Optional[str]): The Data Commons instance to use. Defaults to "datacommons.org". |
| 46 | + url (Optional[str]): A custom, fully resolved URL for the Data Commons API. Defaults to None. |
| 47 | + """ |
| 48 | + # Create an instance of the API class which will be injected to the endpoints |
| 49 | + self.api = API(api_key=api_key, dc_instance=dc_instance, url=url) |
| 50 | + |
| 51 | + # Create instances of the endpoints |
| 52 | + self.node = NodeEndpoint(api=self.api) |
| 53 | + self.observation = ObservationEndpoint(api=self.api) |
| 54 | + self.resolve = ResolveEndpoint(api=self.api) |
| 55 | + |
| 56 | + @requires_pandas |
| 57 | + def observations_dataframe( |
| 58 | + self, |
| 59 | + variable_dcids: str | list[str], |
| 60 | + date: ObservationDate | str, |
| 61 | + entity_dcids: Literal["all"] | list[str] = "all", |
| 62 | + entity_type: Optional[str] = None, |
| 63 | + parent_entity: Optional[str] = None, |
| 64 | + ): |
| 65 | + """ |
| 66 | + Fetches statistical observations and returns them as a Pandas DataFrame. |
| 67 | +
|
| 68 | + The Observation API fetches statistical observations linked to entities and variables |
| 69 | + at a particular date (e.g., "population of USA in 2020", "GDP of California in 2010"). |
| 70 | +
|
| 71 | + Args: |
| 72 | + variable_dcids (str | list[str]): One or more variable DCIDs for the observation. |
| 73 | + date (ObservationDate | str): The date for which observations are requested. It can be |
| 74 | + a specific date, "all" to retrieve all observations, or "latest" to get the most recent observations. |
| 75 | + entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs to retrieve data for. |
| 76 | + Defaults to "all". DCIDs must include their type (e.g "country/GTM" for Guatemala). |
| 77 | + entity_type (Optional[str], optional): The type of entities to filter by when `entity_dcids="all"`. |
| 78 | + Required if `entity_dcids="all"`. Defaults to None. |
| 79 | + parent_entity (Optional[str], optional): The parent entity under which the target entities fall. |
| 80 | + Used only when `entity_dcids="all"`. Defaults to None. |
| 81 | +
|
| 82 | + Returns: |
| 83 | + pd.DataFrame: A DataFrame containing the requested observations. |
| 84 | + """ |
| 85 | + |
| 86 | + if entity_dcids == "all" and not entity_type: |
| 87 | + raise ValueError( |
| 88 | + "When 'entity_dcids' is 'all', 'entity_type' must be specified.") |
| 89 | + |
| 90 | + if entity_dcids != "all" and (entity_type or parent_entity): |
| 91 | + raise ValueError( |
| 92 | + "Specify 'entity_type' and 'parent_entity' only when 'entity_dcids' is 'all'." |
| 93 | + ) |
| 94 | + |
| 95 | + if entity_dcids == "all": |
| 96 | + observations = self.observation.fetch_observations_by_entity_type( |
| 97 | + variable_dcids=variable_dcids, |
| 98 | + date=date, |
| 99 | + entity_type=entity_type, |
| 100 | + parent_entity=parent_entity, |
| 101 | + ) |
| 102 | + else: |
| 103 | + observations = self.observation.fetch_observations_by_entity( |
| 104 | + variable_dcids=variable_dcids, date=date, entity_dcids=entity_dcids) |
| 105 | + |
| 106 | + return pd.DataFrame(observations.get_observations_as_records()) |
0 commit comments