|
| 1 | +# Copyright 2022 Google Inc. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +""" Data Commons Python API Query Module. |
| 15 | +
|
| 16 | +Implements functions for sending graph queries to the Data Commons Graph. |
| 17 | +""" |
| 18 | + |
| 19 | +from datacommons.requests import _post |
| 20 | + |
| 21 | + |
| 22 | +def query(query_string, select=None): |
| 23 | + """ Returns the results of executing a SPARQL query on the Data Commons graph. |
| 24 | +
|
| 25 | + Args: |
| 26 | + query_string (:obj:`str`): The SPARQL query string. |
| 27 | + select (:obj:`func` accepting a row of the query result): A function that |
| 28 | + selects rows to be returned by :code:`query`. This function accepts a row |
| 29 | + on the results of executing :code:`query_string` and returns True if and |
| 30 | + only if the row is to be returned by :code:`query`. The row passed in as |
| 31 | + an argument is represented as a :obj:`dict` that maps a query variable in |
| 32 | + :code:`query_string` to its value in the given row. |
| 33 | +
|
| 34 | + Returns: |
| 35 | + A table, represented as a :obj:`list` of rows, resulting from executing the |
| 36 | + given SPARQL query. Each row is a :obj:`dict` mapping query variable to its |
| 37 | + value in the row. If `select` is not `None`, then a row is included in the |
| 38 | + returned :obj:`list` if and only if `select` returns :obj:`True` for that |
| 39 | + row. |
| 40 | +
|
| 41 | + Raises: |
| 42 | + ValueError: If the payload returned by the Data Commons REST API is |
| 43 | + malformed. |
| 44 | +
|
| 45 | + Examples: |
| 46 | + We would like to query for the name associated with three states identified |
| 47 | + by their dcids |
| 48 | + `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_, |
| 49 | + `Kentucky <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and |
| 50 | + `Maryland <https://browser.datacommons.org/kg?dcid=geoId/24>`_. |
| 51 | +
|
| 52 | + >>> query_str = ''' |
| 53 | + ... SELECT ?name ?dcid |
| 54 | + ... WHERE { |
| 55 | + ... ?a typeOf Place . |
| 56 | + ... ?a name ?name . |
| 57 | + ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") . |
| 58 | + ... ?a dcid ?dcid |
| 59 | + ... } |
| 60 | + ... ''' |
| 61 | + >>> result = query(query_str) |
| 62 | + >>> for r in result: |
| 63 | + ... print(r) |
| 64 | + {"?name": "Maryland", "?dcid": "geoId/24"} |
| 65 | + {"?name": "Kentucky", "?dcid": "geoId/21"} |
| 66 | + {"?name": "California", "?dcid": "geoId/06"} |
| 67 | +
|
| 68 | + Optionally, we can specify which rows are returned by setting :code:`select` |
| 69 | + like so. The following returns all rows where the name is "Maryland". |
| 70 | +
|
| 71 | + >>> selector = lambda row: row['?name'] == 'Maryland' |
| 72 | + >>> result = query(query_str, select=selector) |
| 73 | + >>> for r in result: |
| 74 | + ... print(r) |
| 75 | + {"?name": "Maryland", "?dcid": "geoId/24"} |
| 76 | + """ |
| 77 | + resp = _post('/query', {'sparql': query_string}) |
| 78 | + # Iterate through the query results |
| 79 | + header = resp.get('header') |
| 80 | + if header is None: |
| 81 | + raise ValueError('Ill-formatted response: does not contain a header.') |
| 82 | + result_rows = [] |
| 83 | + for row in resp.get('rows', []): |
| 84 | + # Construct the map from query variable to cell value. |
| 85 | + row_map = {} |
| 86 | + for idx, cell in enumerate(row.get('cells', [])): |
| 87 | + if idx > len(header): |
| 88 | + raise ValueError('Query error: unexpected cell {}'.format(cell)) |
| 89 | + if 'value' not in cell: |
| 90 | + raise ValueError( |
| 91 | + 'Query error: cell missing value {}'.format(cell)) |
| 92 | + cell_var = header[idx] |
| 93 | + row_map[cell_var] = cell['value'] |
| 94 | + # Add the row to the result rows if it is selected |
| 95 | + if select is None or select(row_map): |
| 96 | + result_rows.append(row_map) |
| 97 | + return result_rows |
0 commit comments