Skip to content

Commit 477977f

Browse files
authored
Batch get_stat_all calls if more than QUERY_BATCH_SIZE places. (#155)
Also replace mixer reference with REST API reference.
1 parent b51f73d commit 477977f

File tree

3 files changed

+78
-18
lines changed

3 files changed

+78
-18
lines changed

datacommons/examples/stat_vars.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,37 @@ def call_str(pvs):
126126
dc.get_stat_all(["badPlaceId", "country/FRA"],
127127
["Median_Age_Person", "Count_Person"]))
128128

129+
print('\nSTRESS TEST FOR GET_STAT_ALL')
130+
try:
131+
dc.get_stat_all(
132+
dc.get_places_in(['country/USA'], 'County')['country/USA'], [
133+
'Count_Person_Unemployed', 'Median_Age_Person', 'Count_Death',
134+
'Count_Death_CertainInfectiousParasiticDiseases',
135+
'Count_UnemploymentInsuranceClaim_StateUnemploymentInsurance',
136+
'Count_Worker_NAICSAccommodationFoodServices',
137+
'Count_Household_With0AvailableVehicles',
138+
'Count_Person_WhiteAlone', 'Count_Person_WorkedFullTime',
139+
'Count_Person_Employed', 'Count_Person_EnrolledInSchool',
140+
'Count_Person_EnrolledInGrade9ToGrade12',
141+
'Count_Person_ResidesInGroupQuarters',
142+
'Count_Person_NowMarried',
143+
'RetailDrugDistribution_DrugDistribution_DMethamphetamine',
144+
'Count_Household_With2Person',
145+
'Count_Person_EducationalAttainment2NdGrade',
146+
'Count_Person_EducationalAttainmentGedOrAlternativeCredential',
147+
'Count_Person_EducationalAttainmentKindergarten',
148+
'Count_Person_EducationalAttainmentRegularHighSchoolDiploma',
149+
'Count_Death_DiseasesOfBloodAndBloodFormingOrgansAndImmuneDisorders',
150+
'Count_Household_NoComputer',
151+
'Median_Income_Household_HouseholderRaceHispanicOrLatino,',
152+
'Count_HousingUnit_RenterOccupied',
153+
'Count_Worker_NAICSInformation'
154+
])
155+
except ValueError:
156+
print('Stress test for get_stat_all FAILED!')
157+
else:
158+
print('Stress test for get_stat_all succeeded.')
159+
129160

130161
if __name__ == '__main__':
131162
main()

datacommons/stat_vars.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -203,17 +203,46 @@ def get_stat_all(places, stat_vars):
203203
}
204204
"""
205205
url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all']
206-
req_json = {'stat_vars': stat_vars, 'places': places}
207-
208-
# Send the request
209-
res_json = utils._send_request(url, req_json=req_json, use_payload=False)
210-
211-
if 'placeData' not in res_json:
212-
raise ValueError('No data in response.')
213-
214-
# Unnest the REST response for keys that have single-element values.
215-
place_statvar_series = collections.defaultdict(dict)
216-
for place_dcid, place in res_json['placeData'].items():
217-
for stat_var_dcid, stat_var in place['statVarData'].items():
218-
place_statvar_series[place_dcid][stat_var_dcid] = stat_var
219-
return dict(place_statvar_series)
206+
places = list(places)
207+
# Get number of batches via an arithmetic ceiling trick:
208+
# 11//10 rounds down to 1.
209+
# -11//10 rounds down to -2.
210+
# We can divide with, then remove the negative to get the ceiling.
211+
batches = -(-len(places) // utils._QUERY_BATCH_SIZE)
212+
res = {}
213+
for i in range(batches):
214+
req_json = {
215+
'stat_vars':
216+
stat_vars,
217+
'places':
218+
places[i * utils._QUERY_BATCH_SIZE:(i + 1) *
219+
utils._QUERY_BATCH_SIZE]
220+
}
221+
# Send the request
222+
res_json = utils._send_request(url,
223+
req_json=req_json,
224+
use_payload=False)
225+
if 'placeData' not in res_json:
226+
# The REST API spec will always return a dictionary under
227+
# placeData, even if no places exist or have no
228+
# data. If no Places are provided, REST will return an
229+
# error, which will have been caught and passed on in
230+
# _send_request.
231+
raise ValueError("Unexpected response from REST stat/all API.")
232+
233+
# Unnest the REST response for keys that have single-element values.
234+
place_statvar_series = collections.defaultdict(dict)
235+
for place_dcid, place in res_json['placeData'].items():
236+
stat_var_data = place.get('statVarData')
237+
if not stat_var_data:
238+
# The REST API spec will always return a dictionary under
239+
# statVarData, even if no StatVars exist or have no
240+
# data. If no StatVars are provided, REST will return an
241+
# error, which will have been caught and passed on in
242+
# _send_request.
243+
raise ValueError("Unexpected response from REST stat/all API.")
244+
for stat_var_dcid, stat_var in stat_var_data.items():
245+
place_statvar_series[place_dcid][stat_var_dcid] = stat_var
246+
res.update(dict(place_statvar_series))
247+
248+
return res

datacommons/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,12 @@ def _send_request(req_url, req_json={}, compress=False, post=True, use_payload=T
111111
res = six.moves.urllib.request.urlopen(req)
112112
except six.moves.urllib.error.HTTPError as e:
113113
raise ValueError(
114-
'Response error: An HTTP {} code was returned by the mixer. Printing '
115-
'response\n\n{}'.format(e.code, e.read()))
114+
'Response error: An HTTP {} code was returned by the REST API. '
115+
'Printing response\n\n{}'.format(e.code, e.read()))
116116
if isinstance(res, six.moves.urllib.error.HTTPError):
117117
raise ValueError(
118-
'Response error: An HTTP {} code was returned by the mixer. Printing '
119-
'response\n\n{}'.format(res.code, res.msg))
118+
'Response error: An HTTP {} code was returned by the REST API. '
119+
'Printing response\n\n{}'.format(res.code, res.msg))
120120
# Get the JSON
121121
res_json = json.loads(res.read())
122122
if not use_payload:

0 commit comments

Comments
 (0)