Skip to content

Commit 36d0496

Browse files
authored
[V2] Add node APIs and clean up packaging (#182)
* [V2] Add node APIs and clean up packaging * clean up * update * fix * return value typing * resolve comments
1 parent 4c50974 commit 36d0496

File tree

20 files changed

+526
-211
lines changed

20 files changed

+526
-211
lines changed

.github/ISSUE_TEMPLATE/python-api-v1-0-0-feedback.md

Lines changed: 0 additions & 10 deletions
This file was deleted.

README.md

Lines changed: 0 additions & 124 deletions
This file was deleted.

datacommons/README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Data Commons Python API
2+
3+
This is a Python library for accessing data in the Data Commons Graph.
4+
5+
> See also: [Data Commons Pandas API](../datacommons_pandas/README.md).
6+
7+
To get started, install this package from pip.
8+
9+
```bash
10+
pip install datacommons
11+
```
12+
13+
Once the package is installed, import `datacommons`.
14+
15+
```python
16+
import datacommons as dc
17+
```
18+
19+
For more detail on getting started with the API, please visit our
20+
[API Overview](https://docs.datacommons.org/api/).
21+
22+
When you are ready to use the API, you can refer to `examples` for
23+
examples on how to use this package to perform various tasks. More tutorials and
24+
documentation can be found on our [tutorials page](https://docs.datacommons.org/tutorials/)!
25+
26+
## About Data Commons
27+
28+
[Data Commons](https://datacommons.org/) is an open knowledge repository that
29+
provides a unified view across multiple public data sets and statistics. You can
30+
view what [datasets](https://datacommons.org/datasets) are currently ingested
31+
and browse the graph using our [browser](https://datacommons.org/browser).
32+
33+
## License
34+
35+
Apache 2.0
36+
37+
## Support
38+
39+
For general questions or issues about the API, please open an issue on our
40+
[issues](https://github.com/google/datacommons/issues) page. For all other
41+
questions, please send an email to `[email protected]`.

datacommons/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,5 @@
2929
from datacommons.places import get_places_in, get_related_places, get_stats
3030
from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all
3131

32-
# Other utilities
33-
from datacommons.utils import set_api_key
32+
from datacommons.key import set_api_key
33+
from datacommons.node import properties, property_values, triples

datacommons/key.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
""" API key related functions.
15+
"""
16+
17+
import os
18+
19+
# Environment variable for API key.
20+
_KEY_ENV = 'DC_API_KEY'
21+
22+
23+
def set_api_key(api_key):
24+
os.environ[_KEY_ENV] = api_key
25+
26+
27+
def get_api_key():
28+
return os.environ.get(_KEY_ENV, '')

datacommons/node.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
""" API to request node information.
15+
"""
16+
17+
from typing import Dict, List
18+
19+
from datacommons.requests import _post
20+
from datacommons.utils import _get_direction
21+
22+
23+
def properties(nodes: List[str], is_out: bool = True) -> Dict[str, List[str]]:
24+
"""Retrieves all the properties for a list of nodes.
25+
26+
Note this only returns the property labels, not the values.
27+
Args:
28+
nodes: List of DCIDs.
29+
is_out: Whether to return out going properties.
30+
Returns:
31+
A dict keyed by node DCID, with the values being a list of properties
32+
for the queried node.
33+
"""
34+
resp = _post(f'/v1/bulk/properties/{_get_direction(is_out)}', {
35+
'nodes': nodes,
36+
})
37+
result = {}
38+
for item in resp.get('data', []):
39+
node, properties = item['node'], item.get('properties', [])
40+
result[node] = properties
41+
return result
42+
43+
44+
def property_values(nodes: List[str],
45+
property: str,
46+
is_out: bool = True) -> Dict[str, List[str]]:
47+
"""Retrieves the property values for a list of nodes.
48+
Args:
49+
nodes: List of DCIDs.
50+
property: The property label to query for.
51+
is_out: Whether the property is out going.
52+
Returns:
53+
A dict keyed by node DCID, with the values being a list of values
54+
for the queried property.
55+
"""
56+
resp = _post(f'/v1/bulk/property/values/{_get_direction(is_out)}', {
57+
'nodes': nodes,
58+
'property': property,
59+
})
60+
result = {}
61+
for item in resp.get('data', []):
62+
node, values = item['node'], item.get('values', [])
63+
result[node] = []
64+
for v in values:
65+
if 'dcid' in v:
66+
result[node].append(v['dcid'])
67+
else:
68+
result[node].append(v['value'])
69+
return result
70+
71+
72+
def triples(nodes: List[str],
73+
is_out: bool = True) -> Dict[str, Dict[str, List[object]]]:
74+
"""Retrieves the triples for a node.
75+
Args:
76+
nodes: List of DCIDs.
77+
is_out: Whether the returned property is out going for the queried
78+
nodes.
79+
Returns:
80+
A two level dict keyed by node DCID, then by the arc property, with
81+
a list of values or DCIDs.
82+
"""
83+
resp = _post(f'/v1/bulk/triples/{_get_direction(is_out)}',
84+
data={'nodes': nodes})
85+
result = {}
86+
for item in resp.get('data', []):
87+
node, triples = item['node'], item.get('triples', {})
88+
result[node] = {}
89+
for property, other_nodes in triples.items():
90+
result[node][property] = other_nodes.get('nodes', [])
91+
return result

datacommons/requests.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
""" Send http requests to Data Commons REST API endpoints.
15+
"""
16+
17+
import requests
18+
from typing import Dict
19+
20+
import datacommons.key as key
21+
22+
# REST API endpoint root
23+
_API_ROOT = "https://api.datacommons.org"
24+
25+
26+
def _post(path: str, data={}) -> Dict:
27+
url = _API_ROOT + path
28+
headers = {'Content-Type': 'application/json'}
29+
api_key = key.get_api_key()
30+
if api_key:
31+
headers['x-api-key'] = api_key
32+
try:
33+
resp = requests.post(url, json=data, headers=headers)
34+
if resp.status_code != 200:
35+
raise Exception(
36+
f'{resp.status_code}: {resp.reason}\n{resp.json()["message"]}')
37+
return resp.json()
38+
except requests.exceptions.Timeout:
39+
raise Exception('Data request timed out, please try again.')
40+
except requests.exceptions.RequestException as e:
41+
raise e

0 commit comments

Comments
 (0)