Skip to content

Commit 33013fb

Browse files
authored
Make the batch size and number of batches also reflective of number o… (#157)
* Make the batch size and number of batches also reflective of number of stat_vars queried. Increase number of stat_vars in stress test.
1 parent 477977f commit 33013fb

File tree

2 files changed

+91
-21
lines changed

2 files changed

+91
-21
lines changed

datacommons/examples/stat_vars.py

Lines changed: 78 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -130,27 +130,90 @@ def call_str(pvs):
130130
try:
131131
dc.get_stat_all(
132132
dc.get_places_in(['country/USA'], 'County')['country/USA'], [
133-
'Count_Person_Unemployed', 'Median_Age_Person', 'Count_Death',
134-
'Count_Death_CertainInfectiousParasiticDiseases',
135-
'Count_UnemploymentInsuranceClaim_StateUnemploymentInsurance',
136-
'Count_Worker_NAICSAccommodationFoodServices',
137-
'Count_Household_With0AvailableVehicles',
138-
'Count_Person_WhiteAlone', 'Count_Person_WorkedFullTime',
139-
'Count_Person_Employed', 'Count_Person_EnrolledInSchool',
133+
'Count_Person', 'LandAreaSqMeter',
134+
'PopulationDensityPerSqMeter',
135+
'Count_Person_BlackOrAfricanAmericanAlone',
136+
'PercentBlackOrAfricanAmericanAlone', 'Count_Person_Female',
137+
'Count_Person_Male',
138+
'Count_Person_AmericanIndianAndAlaskaNativeAlone',
139+
'Count_Person_AmericanIndianAndAlaskaNativeAloneOrInCombinationWithOneOrMoreOtherRaces',
140+
'Count_Person_AmericanIndianOrAlaskaNativeAlone',
141+
'Count_Person_AsianAlone',
142+
'Count_Person_AsianAloneOrInCombinationWithOneOrMoreOtherRaces',
143+
'Count_Person_BlackOrAfricanAmericanAloneOrInCombinationWithOneOrMoreOtherRaces',
144+
'Count_Person_HispanicOrLatino',
145+
'Count_Person_NativeHawaiianAndOtherPacificIslanderAlone',
146+
'Count_Person_NativeHawaiianAndOtherPacificIslanderAloneOrInCombinationWithOneOrMoreOtherRaces',
147+
'Count_Person_NativeHawaiianOrOtherPacificIslanderAlone',
148+
'Count_Person_SomeOtherRaceAlone',
149+
'Count_Person_SomeOtherRaceAloneOrInCombinationWithOneOrMoreOtherRaces',
150+
'Count_Person_TwoOrMoreRaces', 'Count_Person_WhiteAlone',
151+
'Count_Person_WhiteAloneNotHispanicOrLatino',
152+
'Count_Person_WhiteAloneOrInCombinationWithOneOrMoreOtherRaces',
153+
'Count_Person_Upto5Years', 'Count_Person_Upto18Years',
154+
'Count_Person_65OrMoreYears', 'Count_Person_75OrMoreYears',
155+
'Count_Person_ForeignBorn',
156+
'Count_Person_USCitizenByNaturalization',
157+
'Count_Person_NotAUSCitizen', 'Count_Person_Nonveteran',
158+
'Count_Person_Veteran', 'Count_Person_NotWorkedFullTime',
159+
'Count_Person_WorkedFullTime', 'Count_Person_Employed',
160+
'Count_Person_Unemployed', 'Count_Person_InLaborForce',
161+
'Count_Person_IncomeOf10000To14999USDollar',
162+
'Count_Person_IncomeOf15000To24999USDollar',
163+
'Count_Person_IncomeOf25000To34999USDollar',
164+
'Count_Person_IncomeOf35000To49999USDollar',
165+
'Count_Person_IncomeOf50000To64999USDollar',
166+
'Count_Person_IncomeOf65000To74999USDollar',
167+
'Count_Person_IncomeOf75000OrMoreUSDollar',
168+
'Count_Person_IncomeOfUpto9999USDollar',
169+
'Count_Person_EnrolledInSchool',
170+
'Count_Person_NotEnrolledInSchool',
171+
'Count_Person_EnrolledInCollegeUndergraduateYears',
172+
'Count_Person_EnrolledInGrade1ToGrade4',
173+
'Count_Person_EnrolledInGrade5ToGrade8',
140174
'Count_Person_EnrolledInGrade9ToGrade12',
141-
'Count_Person_ResidesInGroupQuarters',
142-
'Count_Person_NowMarried',
143-
'RetailDrugDistribution_DrugDistribution_DMethamphetamine',
144-
'Count_Household_With2Person',
175+
'Count_Person_EnrolledInKindergarten',
176+
'Count_Person_EnrolledInNurserySchoolPreschool',
177+
'Count_Person_GraduateOrProfessionalSchool',
178+
'Count_Person_EducationalAttainment10ThGrade',
179+
'Count_Person_EducationalAttainment11ThGrade',
180+
'Count_Person_EducationalAttainment12ThGradeNoDiploma',
181+
'Count_Person_EducationalAttainment1StGrade',
145182
'Count_Person_EducationalAttainment2NdGrade',
183+
'Count_Person_EducationalAttainment3RdGrade',
184+
'Count_Person_EducationalAttainment4ThGrade',
185+
'Count_Person_EducationalAttainment5ThGrade',
186+
'Count_Person_EducationalAttainment6ThGrade',
187+
'Count_Person_EducationalAttainment7ThGrade',
188+
'Count_Person_EducationalAttainment8ThGrade',
189+
'Count_Person_EducationalAttainment9ThGrade',
190+
'Count_Person_EducationalAttainmentAssociatesDegree',
191+
'Count_Person_EducationalAttainmentBachelorsDegree',
192+
'Count_Person_EducationalAttainmentBachelorsDegreeOrHigher',
193+
'Count_Person_EducationalAttainmentDoctorateDegree',
146194
'Count_Person_EducationalAttainmentGedOrAlternativeCredential',
147195
'Count_Person_EducationalAttainmentKindergarten',
196+
'Count_Person_EducationalAttainmentMastersDegree',
197+
'Count_Person_EducationalAttainmentNoSchoolingCompleted',
198+
'Count_Person_EducationalAttainmentNurserySchool',
199+
'Count_Person_EducationalAttainmentPrimarySchool',
200+
'Count_Person_EducationalAttainmentProfessionalSchoolDegree',
148201
'Count_Person_EducationalAttainmentRegularHighSchoolDiploma',
202+
'Count_Person_EducationalAttainmentSomeCollege1OrMoreYearsNoDegree',
203+
'Count_Person_EducationalAttainmentSomeCollegeLessThan1Year',
204+
'Count_Person_Divorced', 'Count_Person_MarriedAndNotSeparated',
205+
'Count_Person_NeverMarried', 'Count_Person_Separated',
206+
'Count_Person_Widowed', 'Count_Person_NowMarried',
207+
'Count_Person_AbovePovertyLevelInThePast12Months',
208+
'Count_Person_BelowPovertyLevelInThePast12Months',
209+
'Percent_Person_20OrMoreYears_WithDiabetes',
210+
'Percent_Person_20OrMoreYears_Obesity',
211+
'Percent_Person_20OrMoreYears_PhysicalInactivity',
212+
'Percent_Person_Upto64Years_NoHealthInsurance',
213+
'Median_Age_Person', 'Median_Income_Person', 'Count_Death',
214+
'Count_Death_CertainInfectiousParasiticDiseases',
149215
'Count_Death_DiseasesOfBloodAndBloodFormingOrgansAndImmuneDisorders',
150-
'Count_Household_NoComputer',
151-
'Median_Income_Household_HouseholderRaceHispanicOrLatino,',
152-
'Count_HousingUnit_RenterOccupied',
153-
'Count_Worker_NAICSInformation'
216+
'Count_Death_DiseasesOfTheRespiratorySystem'
154217
])
155218
except ValueError:
156219
print('Stress test for get_stat_all FAILED!')

datacommons/stat_vars.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525

2626
import datacommons.utils as utils
2727

28+
# stat_var specific batch size.
29+
_STAT_BATCH_SIZE = 2000
30+
2831

2932
def get_stat_value(place,
3033
stat_var,
@@ -203,20 +206,24 @@ def get_stat_all(places, stat_vars):
203206
}
204207
"""
205208
url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all']
209+
# Cast iterable-like to list.
206210
places = list(places)
211+
stat_vars = list(stat_vars)
212+
213+
# Aiming for _STAT_BATCH_SIZE entries total.
214+
# _STAT_BATCH_SIZE = num places x num stat_vars, so aim for
215+
# _STAT_BATCH_SIZE/len(stat_vars) places per batch.
216+
places_per_batch = _STAT_BATCH_SIZE // len(stat_vars)
207217
# Get number of batches via an arithmetic ceiling trick:
208218
# 11//10 rounds down to 1.
209219
# -11//10 rounds down to -2.
210220
# We can divide with, then remove the negative to get the ceiling.
211-
batches = -(-len(places) // utils._QUERY_BATCH_SIZE)
221+
batches = -(-len(places) // places_per_batch)
212222
res = {}
213223
for i in range(batches):
214224
req_json = {
215-
'stat_vars':
216-
stat_vars,
217-
'places':
218-
places[i * utils._QUERY_BATCH_SIZE:(i + 1) *
219-
utils._QUERY_BATCH_SIZE]
225+
'stat_vars': stat_vars,
226+
'places': places[i * places_per_batch:(i + 1) * places_per_batch]
220227
}
221228
# Send the request
222229
res_json = utils._send_request(url,

0 commit comments

Comments
 (0)