Skip to content

Commit c0a41d8

Browse files
authored
Update Sparql query API to Python3 (#185)
* Update Sparql query API to Python3 * format * fix comments
1 parent 36d0496 commit c0a41d8

File tree

9 files changed

+243
-395
lines changed

9 files changed

+243
-395
lines changed

datacommons/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
##############################################################################
2323

2424
# Data Commons SPARQL query support
25-
from datacommons.query import query
25+
from datacommons.sparql import query
2626

2727
# Data Commons Python API
2828
from datacommons.core import get_property_labels, get_property_values, get_triples

datacommons/query.py

Lines changed: 0 additions & 129 deletions
This file was deleted.

datacommons/sparql.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
""" Data Commons Python API Query Module.
15+
16+
Implements functions for sending graph queries to the Data Commons Graph.
17+
"""
18+
19+
from datacommons.requests import _post
20+
21+
22+
def query(query_string, select=None):
23+
""" Returns the results of executing a SPARQL query on the Data Commons graph.
24+
25+
Args:
26+
query_string (:obj:`str`): The SPARQL query string.
27+
select (:obj:`func` accepting a row of the query result): A function that
28+
selects rows to be returned by :code:`query`. This function accepts a row
29+
on the results of executing :code:`query_string` and returns True if and
30+
only if the row is to be returned by :code:`query`. The row passed in as
31+
an argument is represented as a :obj:`dict` that maps a query variable in
32+
:code:`query_string` to its value in the given row.
33+
34+
Returns:
35+
A table, represented as a :obj:`list` of rows, resulting from executing the
36+
given SPARQL query. Each row is a :obj:`dict` mapping query variable to its
37+
value in the row. If `select` is not `None`, then a row is included in the
38+
returned :obj:`list` if and only if `select` returns :obj:`True` for that
39+
row.
40+
41+
Raises:
42+
ValueError: If the payload returned by the Data Commons REST API is
43+
malformed.
44+
45+
Examples:
46+
We would like to query for the name associated with three states identified
47+
by their dcids
48+
`California <https://browser.datacommons.org/kg?dcid=geoId/06>`_,
49+
`Kentucky <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and
50+
`Maryland <https://browser.datacommons.org/kg?dcid=geoId/24>`_.
51+
52+
>>> query_str = '''
53+
... SELECT ?name ?dcid
54+
... WHERE {
55+
... ?a typeOf Place .
56+
... ?a name ?name .
57+
... ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
58+
... ?a dcid ?dcid
59+
... }
60+
... '''
61+
>>> result = query(query_str)
62+
>>> for r in result:
63+
... print(r)
64+
{"?name": "Maryland", "?dcid": "geoId/24"}
65+
{"?name": "Kentucky", "?dcid": "geoId/21"}
66+
{"?name": "California", "?dcid": "geoId/06"}
67+
68+
Optionally, we can specify which rows are returned by setting :code:`select`
69+
like so. The following returns all rows where the name is "Maryland".
70+
71+
>>> selector = lambda row: row['?name'] == 'Maryland'
72+
>>> result = query(query_str, select=selector)
73+
>>> for r in result:
74+
... print(r)
75+
{"?name": "Maryland", "?dcid": "geoId/24"}
76+
"""
77+
resp = _post('/query', {'sparql': query_string})
78+
# Iterate through the query results
79+
header = resp.get('header')
80+
if header is None:
81+
raise ValueError('Ill-formatted response: does not contain a header.')
82+
result_rows = []
83+
for row in resp.get('rows', []):
84+
# Construct the map from query variable to cell value.
85+
row_map = {}
86+
for idx, cell in enumerate(row.get('cells', [])):
87+
if idx > len(header):
88+
raise ValueError('Query error: unexpected cell {}'.format(cell))
89+
if 'value' not in cell:
90+
raise ValueError(
91+
'Query error: cell missing value {}'.format(cell))
92+
cell_var = header[idx]
93+
row_map[cell_var] = cell['value']
94+
# Add the row to the result rows if it is selected
95+
if select is None or select(row_map):
96+
result_rows.append(row_map)
97+
return result_rows

datacommons/test/node_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
# limitations under the License.
1414

1515
import unittest
16-
from unittest import mock
16+
from unittest.mock import patch
1717

1818
import datacommons
1919

2020

2121
class TestProperties(unittest.TestCase):
2222

23-
@mock.patch("datacommons.node._post")
23+
@patch("datacommons.node._post")
2424
def test_with_data(self, _post):
2525

2626
def side_effect(path, data):
@@ -46,7 +46,7 @@ def side_effect(path, data):
4646

4747
class TestPropertyValues(unittest.TestCase):
4848

49-
@mock.patch("datacommons.node._post")
49+
@patch("datacommons.node._post")
5050
def test_with_data(self, _post):
5151

5252
def side_effect(path, data):
@@ -70,7 +70,7 @@ def side_effect(path, data):
7070
response = datacommons.property_values(["geoId/06"], "name")
7171
assert response == {"geoId/06": ["California"]}
7272

73-
@mock.patch("datacommons.node._post")
73+
@patch("datacommons.node._post")
7474
def test_multiple_values(self, _post):
7575

7676
def side_effect(path, data):
@@ -100,7 +100,7 @@ def side_effect(path, data):
100100

101101
class TestTriples(unittest.TestCase):
102102

103-
@mock.patch("datacommons.node._post")
103+
@patch("datacommons.node._post")
104104
def test_with_data(self, _post):
105105

106106
def side_effect(path, data):

0 commit comments

Comments
 (0)