diff --git a/Database/Telework.yaml b/Database/Telework.yaml new file mode 100644 index 0000000..ebbd07a --- /dev/null +++ b/Database/Telework.yaml @@ -0,0 +1,38 @@ +# ---------------- Work from home module inputs (trip generation) +validationfiles: N +wfhpctlow: 0.143 +wfhpctmedium: 0.232 +wfhpcthigh: 0.552 +# ---------------- Telework worker attributes by edu, income and children +# WFHL: # low wfh percent group +# edu: +# 1: 0.305 # low wfhGroup less than Banchelor +# 2: 0.695 # low wfhGroup Banchelor Plus +# inc: +# 1: 0.353 # low wfhGroup inc less than 100K +# 2: 0.647 # low wfhGroup inc 100K Plus +# chi: +# 1: 0.37 # low wfhGroup children under 18 +# 2: 0.63 # low wfhGroup no children under 18 + +# WFHM: # medium wfh percent group +# edu: +# 1: 0.268 # medium wfhGroup less than Banchelor +# 2: 0.732 # medium wfhGroup Banchelor Plus +# inc: +# 1: 0.418 # medium wfhGroup inc less than 100K +# 2: 0.582 # medium wfhGroup inc 100K Plus +# chi: +# 1: 0.354 # medium wfhGroup children under 18 +# 2: 0.646 # medium wfhGroup no children under 18 + +# WFHH: # high wfh percent group +# edu: +# 1: 0.202 # high wfhGroup less than Banchelor +# 2: 0.798 # high wfhGroup Banchelor Plus +# inc: +# 1: 0.231 # high wfhGroup inc less than 100K +# 2: 0.769 # high wfhGroup inc 100K Plus +# chi: +# 1: 0.405 # high wfhGroup children under 18 +# 2: 0.595 # high wfhGroup no children under 18 \ No newline at end of file diff --git a/Database/tg/fortran/wfhmodule/chidist.csv b/Database/tg/fortran/wfhmodule/chidist.csv new file mode 100644 index 0000000..38cb0fa --- /dev/null +++ b/Database/tg/fortran/wfhmodule/chidist.csv @@ -0,0 +1,7 @@ +wfhgroup,hhchi2,pct +high,1,0.405 +high,2,0.595 +medium,1,0.354 +medium,2,0.646 +low,1,0.37 +low,2,0.63 diff --git a/Database/tg/fortran/wfhmodule/edudist.csv b/Database/tg/fortran/wfhmodule/edudist.csv new file mode 100644 index 0000000..92e43db --- /dev/null +++ b/Database/tg/fortran/wfhmodule/edudist.csv @@ -0,0 +1,7 @@ +wfhgroup,hhedu2,pct +high,1,0.202 +high,2,0.798 +medium,1,0.268 +medium,2,0.732 +low,1,0.305 +low,2,0.695 diff --git a/Database/tg/fortran/wfhmodule/incdist.csv b/Database/tg/fortran/wfhmodule/incdist.csv index 8ac6f97..501031d 100644 --- a/Database/tg/fortran/wfhmodule/incdist.csv +++ b/Database/tg/fortran/wfhmodule/incdist.csv @@ -1,9 +1,7 @@ -incdist,hhinc4,pct -high,1,0.038985164 -high,2,0.080435697 -high,3,0.156394004 -high,4,0.724185136 -low,1,0.09287371 -low,2,0.134953622 -low,3,0.238871815 -low,4,0.533300852 +wfhgroup,hhinc2,pct +high,1,0.231 +high,2,0.769 +medium,1,0.418 +medium,2,0.582 +low,1,0.353 +low,2,0.647 diff --git a/Database/tg/fortran/wfhmodule/indp_naics.csv b/Database/tg/fortran/wfhmodule/indp_naics.csv index abe0f56..e3933e3 100644 --- a/Database/tg/fortran/wfhmodule/indp_naics.csv +++ b/Database/tg/fortran/wfhmodule/indp_naics.csv @@ -1,295 +1,295 @@ -INDP,naics2 -170,11 -180,11 -190,11 -270,11 -280,11 -290,11 -370,21 -380,21 -390,21 -470,21 -490,21 -570,22 -580,22 -590,22 -670,22 -680,22 -690,22 -770,23 -1070,31-33 -1080,31-33 -1090,31-33 -1170,31-33 -1180,31-33 -1190,31-33 -1270,31-33 -1280,31-33 -1290,31-33 -1370,31-33 -1390,31-33 -1470,31-33 -1480,31-33 -1490,31-33 -1570,31-33 -1590,31-33 -1670,31-33 -1680,31-33 -1690,31-33 -1691,31-33 -1770,31-33 -1790,31-33 -1870,31-33 -1880,31-33 -1890,31-33 -1990,31-33 -2070,31-33 -2090,31-33 -2170,31-33 -2180,31-33 -2190,31-33 -2270,31-33 -2280,31-33 -2290,31-33 -2370,31-33 -2380,31-33 -2390,31-33 -2470,31-33 -2480,31-33 -2490,31-33 -2570,31-33 -2590,31-33 -2670,31-33 -2680,31-33 -2690,31-33 -2770,31-33 -2780,31-33 -2790,31-33 -2870,31-33 -2880,31-33 -2890,31-33 -2970,31-33 -2980,31-33 -2990,31-33 -3070,31-33 -3080,31-33 -3090,31-33 -3095,31-33 -3170,31-33 -3180,31-33 -3190,31-33 -3291,31-33 -3360,31-33 -3365,31-33 -3370,31-33 -3380,31-33 -3390,31-33 -3470,31-33 -3490,31-33 -3570,31-33 -3580,31-33 -3590,31-33 -3670,31-33 -3680,31-33 -3690,31-33 -3770,31-33 -3780,31-33 -3790,31-33 -3870,31-33 -3875,31-33 -3890,31-33 -3895,31-33 -3960,31-33 -3970,31-33 -3980,31-33 -3990,31-33 -4070,42 -4080,42 -4090,42 -4170,42 -4180,42 -4190,42 -4195,42 -4260,42 -4265,42 -4270,42 -4280,42 -4290,42 -4370,42 -4380,42 -4390,42 -4470,42 -4480,42 -4490,42 -4560,42 -4570,42 -4580,42 -4585,42 -4590,42 -4670,44-45 -4680,44-45 -4690,44-45 -4770,44-45 -4780,44-45 -4790,44-45 -4795,44-45 -4870,44-45 -4880,44-45 -4890,44-45 -4970,44-45 -4971,44-45 -4972,44-45 -4980,44-45 -4990,44-45 -5070,44-45 -5080,44-45 -5090,44-45 -5170,44-45 -5180,44-45 -5190,44-45 -5270,44-45 -5275,44-45 -5280,44-45 -5290,44-45 -5295,44-45 -5370,44-45 -5380,44-45 -5381,44-45 -5390,44-45 -5391,44-45 -5470,44-45 -5480,44-45 -5490,44-45 -5570,44-45 -5580,44-45 -5590,44-45 -5591,44-45 -5592,44-45 -5593,44-45 -5670,44-45 -5680,44-45 -5690,44-45 -5790,44-45 -6070,48-49 -6080,48-49 -6090,48-49 -6170,48-49 -6180,48-49 -6190,48-49 -6270,48-49 -6280,48-49 -6290,48-49 -6370,48-49 -6380,48-49 -6390,48-49 -6470,51 -6480,51 -6490,51 -6570,51 -6590,51 -6670,51 -6672,51 -6680,51 -6690,51 -6695,51 -6770,51 -6780,51 -6870,52 -6880,52 -6890,52 -6970,52 -6990,52 -6991,52 -6992,52 -7070,53 -7071,53 -7072,53 -7080,53 -7170,53 -7180,53 -7181,53 -7190,53 -7270,54 -7280,54 -7290,54 -7370,54 -7380,54 -7390,54 -7460,54 -7470,54 -7480,54 -7490,54 -7570,55 -7580,56 -7590,56 -7670,56 -7680,56 -7690,56 -7770,56 -7780,56 -7790,56 -7860,61 -7870,61 -7880,61 -7890,61 -7970,62 -7980,62 -7990,62 -8070,62 -8080,62 -8090,62 -8170,62 -8180,62 -8190,62 -8191,62 -8192,62 -8270,62 -8290,62 -8370,62 -8380,62 -8390,62 -8470,62 -8560,71 -8561,71 -8562,71 -8563,71 -8564,71 -8570,71 -8580,71 -8590,71 -8660,72 -8670,72 -8680,72 -8690,72 -8770,81 -8780,81 -8790,81 -8870,81 -8880,81 -8891,81 -8970,81 -8980,81 -8990,81 -9070,81 -9080,81 -9090,81 -9160,81 -9170,81 -9180,81 -9190,81 -9290,81 -9370,92 -9380,92 -9390,92 -9470,92 -9480,92 -9490,92 -9570,92 -9590,92 -9670,92 -9680,92 -9690,92 -9770,92 -9780,92 -9790,92 -9870,92 +INDP,naics2,trc +170,11,low +180,11,low +190,11,low +270,11,low +280,11,low +290,11,low +370,21,low +380,21,low +390,21,low +470,21,low +490,21,low +570,22,medium +580,22,medium +590,22,medium +670,22,medium +680,22,medium +690,22,medium +770,23,low +1070,31-33,low +1080,31-33,low +1090,31-33,low +1170,31-33,low +1180,31-33,low +1190,31-33,low +1270,31-33,low +1280,31-33,low +1290,31-33,low +1370,31-33,low +1390,31-33,low +1470,31-33,low +1480,31-33,low +1490,31-33,low +1570,31-33,low +1590,31-33,low +1670,31-33,low +1680,31-33,low +1690,31-33,low +1691,31-33,low +1770,31-33,low +1790,31-33,low +1870,31-33,low +1880,31-33,low +1890,31-33,low +1990,31-33,low +2070,31-33,low +2090,31-33,low +2170,31-33,low +2180,31-33,low +2190,31-33,low +2270,31-33,low +2280,31-33,low +2290,31-33,low +2370,31-33,low +2380,31-33,low +2390,31-33,low +2470,31-33,low +2480,31-33,low +2490,31-33,low +2570,31-33,low +2590,31-33,low +2670,31-33,low +2680,31-33,low +2690,31-33,low +2770,31-33,low +2780,31-33,low +2790,31-33,low +2870,31-33,low +2880,31-33,low +2890,31-33,low +2970,31-33,low +2980,31-33,low +2990,31-33,low +3070,31-33,low +3080,31-33,low +3090,31-33,low +3095,31-33,low +3170,31-33,low +3180,31-33,low +3190,31-33,low +3291,31-33,low +3360,31-33,low +3365,31-33,low +3370,31-33,low +3380,31-33,low +3390,31-33,low +3470,31-33,low +3490,31-33,low +3570,31-33,low +3580,31-33,low +3590,31-33,low +3670,31-33,low +3680,31-33,low +3690,31-33,low +3770,31-33,low +3780,31-33,low +3790,31-33,low +3870,31-33,low +3875,31-33,low +3890,31-33,low +3895,31-33,low +3960,31-33,low +3970,31-33,low +3980,31-33,low +3990,31-33,low +4070,42,medium +4080,42,medium +4090,42,medium +4170,42,medium +4180,42,medium +4190,42,medium +4195,42,medium +4260,42,medium +4265,42,medium +4270,42,medium +4280,42,medium +4290,42,medium +4370,42,medium +4380,42,medium +4390,42,medium +4470,42,medium +4480,42,medium +4490,42,medium +4560,42,medium +4570,42,medium +4580,42,medium +4585,42,medium +4590,42,medium +4670,44-45,low +4680,44-45,low +4690,44-45,low +4770,44-45,low +4780,44-45,low +4790,44-45,low +4795,44-45,low +4870,44-45,low +4880,44-45,low +4890,44-45,low +4970,44-45,low +4971,44-45,low +4972,44-45,low +4980,44-45,low +4990,44-45,low +5070,44-45,low +5080,44-45,low +5090,44-45,low +5170,44-45,low +5180,44-45,low +5190,44-45,low +5270,44-45,low +5275,44-45,low +5280,44-45,low +5290,44-45,low +5295,44-45,low +5370,44-45,low +5380,44-45,low +5381,44-45,low +5390,44-45,low +5391,44-45,low +5470,44-45,low +5480,44-45,low +5490,44-45,low +5570,44-45,low +5580,44-45,low +5590,44-45,low +5591,44-45,low +5592,44-45,low +5593,44-45,low +5670,44-45,low +5680,44-45,low +5690,44-45,low +5790,44-45,low +6070,48-49,low +6080,48-49,low +6090,48-49,low +6170,48-49,low +6180,48-49,low +6190,48-49,low +6270,48-49,low +6280,48-49,low +6290,48-49,low +6370,48-49,low +6380,48-49,low +6390,48-49,low +6470,51,high +6480,51,high +6490,51,high +6570,51,high +6590,51,high +6670,51,high +6672,51,high +6680,51,high +6690,51,high +6695,51,high +6770,51,high +6780,51,high +6870,52,high +6880,52,high +6890,52,high +6970,52,high +6990,52,high +6991,52,high +6992,52,high +7070,53,high +7071,53,high +7072,53,high +7080,53,high +7170,53,high +7180,53,high +7181,53,high +7190,53,high +7270,54,high +7280,54,high +7290,54,high +7370,54,high +7380,54,high +7390,54,high +7460,54,high +7470,54,high +7480,54,high +7490,54,high +7570,55,low +7580,56,low +7590,56,low +7670,56,low +7680,56,low +7690,56,low +7770,56,low +7780,56,low +7790,56,low +7860,61,low +7870,61,low +7880,61,low +7890,61,low +7970,62,low +7980,62,low +7990,62,low +8070,62,low +8080,62,low +8090,62,low +8170,62,low +8180,62,low +8190,62,low +8191,62,low +8192,62,low +8270,62,low +8290,62,low +8370,62,low +8380,62,low +8390,62,low +8470,62,low +8560,71,medium +8561,71,medium +8562,71,medium +8563,71,medium +8564,71,medium +8570,71,medium +8580,71,medium +8590,71,medium +8660,72,low +8670,72,low +8680,72,low +8690,72,low +8770,81,low +8780,81,low +8790,81,low +8870,81,low +8880,81,low +8891,81,low +8970,81,low +8980,81,low +8990,81,low +9070,81,low +9080,81,low +9090,81,low +9160,81,low +9170,81,low +9180,81,low +9190,81,low +9290,81,low +9370,92,medium +9380,92,medium +9390,92,medium +9470,92,medium +9480,92,medium +9490,92,medium +9570,92,medium +9590,92,medium +9670,92,medium +9680,92,medium +9690,92,medium +9770,92,medium +9780,92,medium +9790,92,medium +9870,92,medium diff --git a/Database/tg/fortran/wfhmodule/wfhflag.py b/Database/tg/fortran/wfhmodule/wfhflag.py index 95a8f02..275fd0a 100644 --- a/Database/tg/fortran/wfhmodule/wfhflag.py +++ b/Database/tg/fortran/wfhmodule/wfhflag.py @@ -6,6 +6,12 @@ import sys import os +# Current trip_gen.bat +# python wfhflag.py %filedir% %savedir% %wfhFile% %wfh% %tc14% + +# Adj trip_gen.bat +# python wfhflag.py %filedir% %savedir% %wfhFile% %wfhl% %wfhm% %wfhh% + # pathways savedir = sys.argv[2] assert os.path.exists(savedir) == True, "savedir not valid" @@ -13,328 +19,96 @@ synpoppath = "synthetic_persons.zip" synhhpath = "synthetic_households.zip" popsynhhpath = savedir + "/POPSYN_HH.csv" -incdistpath = "incdist.csv" -indmixpath = "indusmix.csv" indpxwalkpath = "indp_naics.csv" +#telework worker distribution by income, edu level and children +incdistpath = "incdist.csv" +edudistpath = "edudist.csv" +chidistpath = "chidist.csv" + # save additional output files? savefiles = sys.argv[3] # major parameters - source: mdt + nirpc survey (which is higher than PUMS data...) # percent of all workers -usualwfhpct = float(sys.argv[4]) -tc14pct = float(sys.argv[5]) -# set seedvalue -seedvalue = 2 -np.random.seed(seed=seedvalue) +''' This data will be separated from batch_file.yaml to Telework.yaml including Tuesday-Thursday telework rates''' -# place industries in one of two income distributions types -# high is based on industries that had >60% of workers in income group 4 -# low is based on other industries -lowlist = ['11', '21', '44-45', '48-49', '56', '61', '62', '71', '72', '81'] -highlist = ['22', '23', '31-33', '42', '51', '52', '53', '54', '55', '92'] +wfhl = float(sys.argv[4]) +wfhm = float(sys.argv[5]) +wfhh = float(sys.argv[6]) -# setup education weights for usualwfh (low to high) -eduw = [0.177, 0.246, 0.576] -# setup education weights for usualwfh (low to high, fine they don't sum to 1) -eduwtc = [0.049, 0.114, 0.720] +wfhpctlist = [wfhl, wfhm, wfhh] -# set distribution of tc14 into the 4 days -tcportions = [0.575, 0.254, 0.112, 0.059] +# set seedvalue +seedvalue = 2 +np.random.seed(seed=seedvalue) -print('setting up for wfhflag script...') # read in files dfpop = pd.read_csv(synpoppath) dfhh = pd.read_csv(synhhpath, dtype={'MV': object}) indpxwalk = pd.read_csv(indpxwalkpath) -indmix = pd.read_csv(indmixpath) -incdist = pd.read_csv(incdistpath) -# industry mix - source: mdt + nirpc survey (rescaled after removing industries -7, -8, and 97 to sum to 1) -indmix1 = indmix[indmix['cat'] == 'usualwfh'][['indus', 'pct']] -indmix2 = indmix[indmix['cat'] == 'tc14'][['indus', 'pct']] +# Need to change if plan to get data from Telework.yaml +incdist = pd.read_csv(incdistpath) +edudist = pd.read_csv(edudistpath) +chidist = pd.read_csv(chidistpath) -# merge naics2, income4, edu, get workers +# merge income2, edu2, trc get workers dfpop = dfpop.merge(indpxwalk, on='INDP', how='left') dfpop = dfpop.merge(dfhh[['household_id', 'HINCP19']], on='household_id', how='left') -dfpop['inccat4'] = pd.cut(dfpop.HINCP19, bins=[-99999, 30000, 60000, 100000, 2000000], right=False, labels=[1, 2, 3, 4]) + +dfpop['inccat2'] = pd.cut(dfpop.HINCP19, bins=[-99999, 100000, 2000000], right=False, labels=[1, 2]) workers = dfpop[dfpop['JWTR'] != 'bb'].copy() workers.JWTR = workers.JWTR.astype(float).astype(int) workers.SCHL = workers.SCHL.astype(float).astype(int) -workers.loc[workers.SCHL.isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 'edu'] = 1 -workers.loc[workers.SCHL.isin([18, 19, 20]), 'edu'] = 2 -workers.loc[workers.SCHL.isin([21, 22, 23, 24]), 'edu'] = 3 -workers.loc[:, 'selected'] = 0 -# overall targets -targetwfh = round(len(workers) * usualwfhpct, 0) -targettc14 = round(len(workers) * tc14pct, 0) +workers.loc[workers.SCHL.isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 'edu'] = 1 +workers.loc[workers.SCHL.isin([21, 22, 23, 24]), 'edu'] = 2 +workers.loc[:, 'selected'] = 0 workers = workers[workers['ESR'] != '4'] -# functions -def portionout(df, pctcol, total, colname): - """ - divides up a total based on a percent column into a new column (no decimals) - :param df: input dataframe - :param pctcol: name of column that has percentages - :param total: can be name of column that has totals, or a numeric total that the percentages will be applied to - :param colname: the name of the result column - :return: dataframe with targetdec, targetint, decimal, and colname columns - """ - df = df.copy() - indcounts = workers[workers.selected == 0].groupby('naics2').agg({'household_id':'count'}).reset_index() - indcounts.rename({'household_id':'sectortotal'}, axis=1, inplace=True) - - - if isinstance(total, int) or isinstance(total, float): - df['targetdec'] = df[pctcol] * total - - df2 = df.merge(indcounts, left_on='indus', right_on='naics2') - df2['testtot'] = df2['sectortotal'] - df2['targetdec'] - - if len(df2[df2.testtot < 0]) == 0: - pass - else: - # don't go all the way up to max to prevent same issue further down the line - df2.loc[df2.testtot < 0, 'newpct'] = (df2.sectortotal - 10) / total - df2.loc[df2.testtot < 0, 'pctdiff'] = df2[pctcol] - df2.newpct - todistribute = df2.pctdiff.sum() - # normalize unchanged pcts to one to distribute out the missing pct - newdenom = 1 - df2.loc[df2.testtot < 0][pctcol].sum() - df2.loc[df2.testtot >=0, 'pctfordist'] = df2[pctcol] / newdenom - df2.loc[df2.testtot >= 0, 'newpct'] = (df2.pctfordist * todistribute) + df2[pctcol] - df2[pctcol] = df2['newpct'] - df2['targetdec'] = df2[pctcol] * total - - df2['targetint'] = df2['targetdec'].astype('int') - df2['decimal'] = df2['targetdec'] - df2['targetint'] - leftover = total - df2['targetint'].sum() - df2.sort_values('decimal', ascending=False, inplace=True) - df2.reset_index(drop=True, inplace=True) - df2.loc[df2.index < leftover, 'targetint'] = df2.targetint + 1 - assert (total - df2.targetint.sum()) == 0, "Targets do not sum to total" - df2.rename({'targetint': colname}, axis=1, inplace=True) - return df2 - - elif isinstance(total, str): - df['targetdec'] = df[pctcol] * df[total] - - df2 = df.merge(indcounts, left_on='indus', right_on='naics2') - df2['testtot'] = df2['sectortotal'] - df2['targetdec'] - - if len(df2[df2.testtot < 0]) == 0: - pass - else: - df2.loc[df2.testtot < 0, 'newpct'] = (df2.sectortotal - 10) / df[total] - df2.loc[df2.testtot < 0, 'pctdiff'] = df2[pctcol] - df2.newpct - todistribute = df2.pctdiff.sum() - df2.loc[df2.testtot >= 0, 'newpct'] = df2[pctcol] * (1 + todistribute) - df2[pctcol] = df2['newpct'] - df2['targetdec'] = df2[pctcol] * df2[total] - - df2['targetint'] = df2['targetdec'].astype('int') - df2['decimal'] = df2['targetdec'] - df2['targetint'] +# telework distribution +inc_values = incdist.iloc[:, 2].astype(float).tolist() +edu_values = edudist.iloc[:, 2].astype(float).tolist() - totallist = [x for x in df2[total].unique()] - piecelist = [] - for i in totallist: - selection = df2[df2[total] == i].copy() - leftover = i - selection['targetint'].sum() - if leftover != 0: - selection.sort_values('decimal', ascending=False, inplace=True) - selection.reset_index(drop=True, inplace=True) - selection.loc[selection.index < leftover, 'targetint'] = selection.targetint + 1 - assert (i - selection.targetint.sum()) == 0, "Targets do not sum to total" - piecelist.append(selection) - df2 = pd.concat(piecelist) - df2.rename({'targetint': colname}, axis=1, inplace=True) - return df2 +# organize into dicts for easier access +incdist_dict = { + "high": {1: inc_values[0], 2: inc_values[1]}, + "medium": {1: inc_values[2], 2: inc_values[3]}, + "low": {1: inc_values[4], 2: inc_values[5]}, +} +edudist_dict = { + "high": {1: edu_values[0], 2: edu_values[1]}, + "medium": {1: edu_values[2], 2: edu_values[3]}, + "low": {1: edu_values[4], 2: edu_values[5]}, +} -def settargets(indmix, overalltarget): - """ - use portionout function to set industry/income targets for group of interest (usualwfh or tc14) - :param indmix: input df with distribution of people in group of interest into industries - :param overalltarget: total target value for people in group of interest - :return: - """ - indmixfinal = portionout(indmix, 'pct', overalltarget, 'indint') - - # income targets - indmixfinal.loc[indmixfinal['indus'].isin(lowlist), 'incdist'] = 'low' - indmixfinal.loc[indmixfinal['indus'].isin(highlist), 'incdist'] = 'high' - targets = indmixfinal[['indus', 'pct', 'indint', 'incdist']].merge(incdist, on='incdist', how='left') - finaltargets = portionout(targets, 'pct_y', 'indint', 'indincint') - finaltargets.reset_index(drop=True, inplace=True) - - return finaltargets +def samplingworkers(df, wfhpctlist): + df = df.copy() - -def jwtr11flag(targetdf, workerdf, flagvalue): - """ - flag workers in workerdf with jwtr11 with the flagvalue according to counts in targetdf - :param targetdf: guiding dataframe with targets values for each category - :param workerdf: worker df with selected flag column - :param flagvalue: the value to set the selected column to - :return:workerdf - """ - for i, row in targetdf.iterrows(): - ind = row['indus'] - inc4 = row['hhinc4'] - target = row['indincint'] - options = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & ( - workerdf['inccat4'] == inc4) & (workerdf['JWTR'] == 11)] - if len(options) > target: - ilist = [x for x in options.sample(target, random_state=seedvalue).index.values] - workerdf.loc[workerdf.index.isin(ilist), 'selected'] = flagvalue - else: - ilist2 = [x for x in options.index.values] - workerdf.loc[workerdf.index.isin(ilist2), 'selected'] = flagvalue - - return workerdf - - - -def preproundtwo(workerdf, flagvalue, eduweights, targets): - """ - prepare for roundtwo assignment by calculating numbers still needed and adding eduwgt column - :param workerdf: worker df with selected flag column - :param flagvalue: value in the 'selected' column to evaluate - :param eduweights: weights to apply based on edu field for use in sampling - :return: b, workerdf. b is the new guiding df that lists how many are still needed in each category - """ - result = workerdf[workerdf['selected'] == flagvalue].groupby(['naics2', 'inccat4']). \ - household_id.count().reset_index() - - # calculate number still needed in each category - comparison = targets.merge(result, left_on=['indus', 'hhinc4'], - right_on=['naics2', 'inccat4'], how='left').reset_index() - comparison.household_id.fillna(0, inplace=True) - comparison['needed'] = comparison['indincint'] - comparison['household_id'] - b = comparison[comparison['needed'] > 0] - workerdf = workerdf.merge(comparison[['indus', 'hhinc4', 'needed']], - left_on=['naics2', 'inccat4'], right_on=['indus', 'hhinc4'], how='left') - workerdf.loc[workerdf['edu'] == 1, 'eduwgt'] = eduweights[0] - workerdf.loc[workerdf['edu'] == 2, 'eduwgt'] = eduweights[1] - workerdf.loc[workerdf['edu'] == 3, 'eduwgt'] = eduweights[2] - - return b, workerdf - - -def roundtwo(dfb, workerdf, flagvalue): - """ - finish flagging workers to meet target values (outside of jwtr == 11) using eduwgt column - :param dfb: guiding dataframe with targets values for each category - :param workerdf: worker df with selected flag column - :param flagvalue: value to set 'selected' column to - :return: workerdf - """ - dolater = {} - - for i, row in dfb.iterrows(): - ind = row['indus'] - inc4 = row['hhinc4'] - target = int(row['needed']) - options = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & ( - workerdf['inccat4'] == inc4)] - - if len(options) >= target: - ilist = [x for x in options.sample(target, weights='eduwgt', random_state=seedvalue).index.values] - workerdf.loc[workerdf.index.isin(ilist), 'selected'] = flagvalue - elif len(options) < target: - dolater[i] = (ind,inc4,target) - + for idx, trc in enumerate(["low", "medium", "high"]): + filtered_df = df[df["trc"] == trc] + targetgrouptotal = int(len(filtered_df) * wfhpctlist[idx]) - if len(dolater) > 0: - donow = pd.DataFrame.from_dict(dolater, orient='index', columns=['ind','inc','target']) - for i,row in donow.iterrows(): - ind = row['ind'] - inc4 = row['inc'] - target = int(row['target']) - options = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & ( - workerdf['inccat4'] == inc4)] - ilist = [x for x in options.index.values] - stillneed = target - len(ilist) - options2 = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & (workerdf.inccat4 != inc4)] - ilist.extend([x for x in options2.sample(stillneed).index.values]) - - workerdf.loc[workerdf.index.isin(ilist), 'selected'] = flagvalue - - return workerdf - - -########################################## -# part one: usualwfh -########################################## - -# industry targets -finaltargets1 = settargets(indmix1, targetwfh) - -print('selecting usualwfh from JWTR 11...') -# round one -workers = jwtr11flag(finaltargets1, workers, 1) - -# round two -b1, workers = preproundtwo(workers, 1, eduw, finaltargets1) - -print('selecting usualwfh from any...') -workers = roundtwo(b1, workers, 1) - -current = workers.selected.sum() -remaining = int(targetwfh - current) -print('checking overall usualwfh target met...') -assert remaining == 0, "overall usualwfh target not met" -workers = workers.drop(['indus', 'hhinc4', 'needed', 'eduwgt'], axis=1) - -########################################## -# part two: tc14 -########################################## - -# industry targets -finaltargets2 = settargets(indmix2, targettc14) - -print('selecting tc14 from JWTR 11...') -# round one -workers = jwtr11flag(finaltargets2, workers, 2) - -# round two -b2, workers = preproundtwo(workers, 2, eduwtc, finaltargets2) - -print('selecting tc14 from any...') -workers = roundtwo(b2, workers, 2) - -current = len(workers[workers['selected'] == 2]) -remaining = int(targettc14 - current) -print('checking overall tc14 target met...') -assert remaining == 0, "overall tc14 target not met" - -########################################## -# assign tc14 wfh status for given day -########################################## -print('assigning wfh status for tc14...') -workers['tc'] = 0 - -tc1 = targettc14 * tcportions[0] -tc2 = targettc14 * tcportions[1] -tc3 = targettc14 * tcportions[2] -tc4 = targettc14 * tcportions[3] - -count = 4 -for t in [tc4, tc3, tc2]: - options = workers[(workers['selected'] == 2) & (workers['tc'] == 0)] - it = int(t) - ilist = [x for x in options.sample(it, random_state=seedvalue).index.values] - workers.loc[workers.index.isin(ilist), 'tc'] = count - count -= 1 + for i in [1, 2]: + for e in [1, 2]: + inc_factor = incdist_dict[trc][i] + edu_factor = edudist_dict[trc][e] + targetwfhworkers = int(targetgrouptotal * inc_factor * edu_factor) + + sub_filtered_df = filtered_df[(filtered_df["inccat2"] == i) & (filtered_df["edu"] == e)] + if targetwfhworkers > 0 and len(sub_filtered_df) >= targetwfhworkers: + sample_idx = sub_filtered_df.sample(n=targetwfhworkers, random_state=2).index + df.loc[sample_idx, "selected"] = 1 # mark selected rows + + return df -workers.loc[(workers['selected'] == 2) & (workers['tc'] == 0), 'tc'] = 1 -workers['random'] = np.random.random(size=len(workers)) -workers.loc[(workers['selected'] == 2) & (workers['random'] < (workers['tc'] / 5)), 'working'] = 1 -workers.working.fillna(0, inplace=True) +workers = samplingworkers(workers, wfhpctlist) ########################################## # format and save @@ -343,18 +117,10 @@ def roundtwo(dfb, workerdf, flagvalue): popsynhh = pd.read_csv(popsynhhpath, names=['sz', 'hhtype', 'vehicles', 'serialno', 'stpuma5', 'rowcol', 'adults', 'workers', 'children', 'iq', 'age', 'hhvtype', 'income']) -workers.loc[workers['selected'] == 1, 'usualwfh'] = 1 -workers.loc[(workers['selected'] == 2) & (workers['working'] == 1), 'tc14'] = 1 -workers.loc[(workers['selected'] == 2) & (workers['working'] == 0), 'tc14nw'] = 1 -workers.usualwfh.fillna(0, inplace=True) -workers.tc14.fillna(0, inplace=True) -workers.tc14nw.fillna(0, inplace=True) -hhsummary = workers.groupby('household_id').agg({'usualwfh':'sum','tc14':'sum','tc14nw':'max'}).reset_index() -hhsummary['wfhworkers'] = hhsummary['tc14'] + hhsummary['usualwfh'] -hhsummary.loc[(hhsummary['tc14'] > 0) & (hhsummary['usualwfh'] > 0), 'finalflag'] = 1 -hhsummary.loc[(hhsummary['tc14'] > 0) & (hhsummary['usualwfh'] == 0), 'finalflag'] = 2 -hhsummary.loc[(hhsummary['tc14'] == 0) & (hhsummary['usualwfh'] > 0), 'finalflag'] = 1 -final1 = dfhh[['household_id','SERIALNO']].merge(hhsummary[['household_id','finalflag','wfhworkers','tc14nw']],on='household_id', how='left') + +hhsummary = workers.groupby("household_id").agg(finalflag=("selected", lambda x: int(x.sum() > 0)),wfhworkers=("selected", "sum")).reset_index() + +final1 = dfhh[['household_id','SERIALNO']].merge(hhsummary[['household_id','finalflag','wfhworkers']],on='household_id', how='left') final1.finalflag.fillna(0, inplace=True) try: final1['SERIALNO'] = final1['SERIALNO'].str.replace('HU','99') @@ -371,11 +137,9 @@ def roundtwo(dfb, workerdf, flagvalue): final1sort['diffcheck'] = final1sort['SERIALNO'] - final1sort['sn2'] assert (final1sort.diffcheck == 0).all(), "file not aligned with popsyn_hh" final1sort.wfhworkers.fillna(0, inplace=True) -final1sort.tc14nw.fillna(0, inplace=True) final1sort['finalflag'] = final1sort.finalflag.astype('int') final1sort['wfhworkers'] = final1sort.wfhworkers.astype('int') -final1sort['tc14nw'] = final1sort.tc14nw.astype('int') -final1sort[['SERIALNO', 'finalflag','wfhworkers','tc14nw']].to_csv(savedir + "/HH_WFH_STATUS.CSV", index=False, header=False) +final1sort[['SERIALNO', 'finalflag','wfhworkers']].to_csv(savedir + "/HH_WFH_STATUS.CSV", index=False, header=False) ########################################## # save additional files @@ -383,6 +147,4 @@ def roundtwo(dfb, workerdf, flagvalue): if savefiles == "Y": print('saving additional files...') - workers.to_csv("workers.csv", index=False) - finaltargets1.to_csv("finaltargets_usualwfh.csv", index=False) - finaltargets2.to_csv("finaltargets_tc14.csv", index=False) \ No newline at end of file + workers.to_csv("workers.csv", index=False) \ No newline at end of file diff --git a/Database/tg/fortran/wfhmodule/wfhflag_old.py b/Database/tg/fortran/wfhmodule/wfhflag_old.py new file mode 100644 index 0000000..95a8f02 --- /dev/null +++ b/Database/tg/fortran/wfhmodule/wfhflag_old.py @@ -0,0 +1,388 @@ +# coding=utf-8 + +# this flags tbm people as usualwfh or tc14 +import pandas as pd +import numpy as np +import sys +import os + +# pathways +savedir = sys.argv[2] +assert os.path.exists(savedir) == True, "savedir not valid" +os.chdir(sys.argv[1]) +synpoppath = "synthetic_persons.zip" +synhhpath = "synthetic_households.zip" +popsynhhpath = savedir + "/POPSYN_HH.csv" +incdistpath = "incdist.csv" +indmixpath = "indusmix.csv" +indpxwalkpath = "indp_naics.csv" + +# save additional output files? +savefiles = sys.argv[3] + +# major parameters - source: mdt + nirpc survey (which is higher than PUMS data...) +# percent of all workers +usualwfhpct = float(sys.argv[4]) +tc14pct = float(sys.argv[5]) + +# set seedvalue +seedvalue = 2 +np.random.seed(seed=seedvalue) + +# place industries in one of two income distributions types +# high is based on industries that had >60% of workers in income group 4 +# low is based on other industries +lowlist = ['11', '21', '44-45', '48-49', '56', '61', '62', '71', '72', '81'] +highlist = ['22', '23', '31-33', '42', '51', '52', '53', '54', '55', '92'] + +# setup education weights for usualwfh (low to high) +eduw = [0.177, 0.246, 0.576] +# setup education weights for usualwfh (low to high, fine they don't sum to 1) +eduwtc = [0.049, 0.114, 0.720] + +# set distribution of tc14 into the 4 days +tcportions = [0.575, 0.254, 0.112, 0.059] + +print('setting up for wfhflag script...') +# read in files +dfpop = pd.read_csv(synpoppath) +dfhh = pd.read_csv(synhhpath, dtype={'MV': object}) +indpxwalk = pd.read_csv(indpxwalkpath) +indmix = pd.read_csv(indmixpath) +incdist = pd.read_csv(incdistpath) + +# industry mix - source: mdt + nirpc survey (rescaled after removing industries -7, -8, and 97 to sum to 1) +indmix1 = indmix[indmix['cat'] == 'usualwfh'][['indus', 'pct']] +indmix2 = indmix[indmix['cat'] == 'tc14'][['indus', 'pct']] + +# merge naics2, income4, edu, get workers +dfpop = dfpop.merge(indpxwalk, on='INDP', how='left') +dfpop = dfpop.merge(dfhh[['household_id', 'HINCP19']], on='household_id', how='left') +dfpop['inccat4'] = pd.cut(dfpop.HINCP19, bins=[-99999, 30000, 60000, 100000, 2000000], right=False, labels=[1, 2, 3, 4]) + +workers = dfpop[dfpop['JWTR'] != 'bb'].copy() +workers.JWTR = workers.JWTR.astype(float).astype(int) +workers.SCHL = workers.SCHL.astype(float).astype(int) +workers.loc[workers.SCHL.isin([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 'edu'] = 1 +workers.loc[workers.SCHL.isin([18, 19, 20]), 'edu'] = 2 +workers.loc[workers.SCHL.isin([21, 22, 23, 24]), 'edu'] = 3 +workers.loc[:, 'selected'] = 0 + +# overall targets +targetwfh = round(len(workers) * usualwfhpct, 0) +targettc14 = round(len(workers) * tc14pct, 0) + +workers = workers[workers['ESR'] != '4'] + +# functions +def portionout(df, pctcol, total, colname): + """ + divides up a total based on a percent column into a new column (no decimals) + :param df: input dataframe + :param pctcol: name of column that has percentages + :param total: can be name of column that has totals, or a numeric total that the percentages will be applied to + :param colname: the name of the result column + :return: dataframe with targetdec, targetint, decimal, and colname columns + """ + df = df.copy() + indcounts = workers[workers.selected == 0].groupby('naics2').agg({'household_id':'count'}).reset_index() + indcounts.rename({'household_id':'sectortotal'}, axis=1, inplace=True) + + + if isinstance(total, int) or isinstance(total, float): + df['targetdec'] = df[pctcol] * total + + df2 = df.merge(indcounts, left_on='indus', right_on='naics2') + df2['testtot'] = df2['sectortotal'] - df2['targetdec'] + + if len(df2[df2.testtot < 0]) == 0: + pass + else: + # don't go all the way up to max to prevent same issue further down the line + df2.loc[df2.testtot < 0, 'newpct'] = (df2.sectortotal - 10) / total + df2.loc[df2.testtot < 0, 'pctdiff'] = df2[pctcol] - df2.newpct + todistribute = df2.pctdiff.sum() + # normalize unchanged pcts to one to distribute out the missing pct + newdenom = 1 - df2.loc[df2.testtot < 0][pctcol].sum() + df2.loc[df2.testtot >=0, 'pctfordist'] = df2[pctcol] / newdenom + df2.loc[df2.testtot >= 0, 'newpct'] = (df2.pctfordist * todistribute) + df2[pctcol] + df2[pctcol] = df2['newpct'] + df2['targetdec'] = df2[pctcol] * total + + df2['targetint'] = df2['targetdec'].astype('int') + df2['decimal'] = df2['targetdec'] - df2['targetint'] + leftover = total - df2['targetint'].sum() + df2.sort_values('decimal', ascending=False, inplace=True) + df2.reset_index(drop=True, inplace=True) + df2.loc[df2.index < leftover, 'targetint'] = df2.targetint + 1 + assert (total - df2.targetint.sum()) == 0, "Targets do not sum to total" + df2.rename({'targetint': colname}, axis=1, inplace=True) + return df2 + + elif isinstance(total, str): + df['targetdec'] = df[pctcol] * df[total] + + df2 = df.merge(indcounts, left_on='indus', right_on='naics2') + df2['testtot'] = df2['sectortotal'] - df2['targetdec'] + + if len(df2[df2.testtot < 0]) == 0: + pass + else: + df2.loc[df2.testtot < 0, 'newpct'] = (df2.sectortotal - 10) / df[total] + df2.loc[df2.testtot < 0, 'pctdiff'] = df2[pctcol] - df2.newpct + todistribute = df2.pctdiff.sum() + df2.loc[df2.testtot >= 0, 'newpct'] = df2[pctcol] * (1 + todistribute) + df2[pctcol] = df2['newpct'] + df2['targetdec'] = df2[pctcol] * df2[total] + + df2['targetint'] = df2['targetdec'].astype('int') + df2['decimal'] = df2['targetdec'] - df2['targetint'] + + totallist = [x for x in df2[total].unique()] + piecelist = [] + for i in totallist: + selection = df2[df2[total] == i].copy() + leftover = i - selection['targetint'].sum() + if leftover != 0: + selection.sort_values('decimal', ascending=False, inplace=True) + selection.reset_index(drop=True, inplace=True) + selection.loc[selection.index < leftover, 'targetint'] = selection.targetint + 1 + assert (i - selection.targetint.sum()) == 0, "Targets do not sum to total" + piecelist.append(selection) + df2 = pd.concat(piecelist) + df2.rename({'targetint': colname}, axis=1, inplace=True) + return df2 + + +def settargets(indmix, overalltarget): + """ + use portionout function to set industry/income targets for group of interest (usualwfh or tc14) + :param indmix: input df with distribution of people in group of interest into industries + :param overalltarget: total target value for people in group of interest + :return: + """ + indmixfinal = portionout(indmix, 'pct', overalltarget, 'indint') + + # income targets + indmixfinal.loc[indmixfinal['indus'].isin(lowlist), 'incdist'] = 'low' + indmixfinal.loc[indmixfinal['indus'].isin(highlist), 'incdist'] = 'high' + targets = indmixfinal[['indus', 'pct', 'indint', 'incdist']].merge(incdist, on='incdist', how='left') + finaltargets = portionout(targets, 'pct_y', 'indint', 'indincint') + finaltargets.reset_index(drop=True, inplace=True) + + return finaltargets + + +def jwtr11flag(targetdf, workerdf, flagvalue): + """ + flag workers in workerdf with jwtr11 with the flagvalue according to counts in targetdf + :param targetdf: guiding dataframe with targets values for each category + :param workerdf: worker df with selected flag column + :param flagvalue: the value to set the selected column to + :return:workerdf + """ + for i, row in targetdf.iterrows(): + ind = row['indus'] + inc4 = row['hhinc4'] + target = row['indincint'] + options = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & ( + workerdf['inccat4'] == inc4) & (workerdf['JWTR'] == 11)] + if len(options) > target: + ilist = [x for x in options.sample(target, random_state=seedvalue).index.values] + workerdf.loc[workerdf.index.isin(ilist), 'selected'] = flagvalue + else: + ilist2 = [x for x in options.index.values] + workerdf.loc[workerdf.index.isin(ilist2), 'selected'] = flagvalue + + return workerdf + + + +def preproundtwo(workerdf, flagvalue, eduweights, targets): + """ + prepare for roundtwo assignment by calculating numbers still needed and adding eduwgt column + :param workerdf: worker df with selected flag column + :param flagvalue: value in the 'selected' column to evaluate + :param eduweights: weights to apply based on edu field for use in sampling + :return: b, workerdf. b is the new guiding df that lists how many are still needed in each category + """ + result = workerdf[workerdf['selected'] == flagvalue].groupby(['naics2', 'inccat4']). \ + household_id.count().reset_index() + + # calculate number still needed in each category + comparison = targets.merge(result, left_on=['indus', 'hhinc4'], + right_on=['naics2', 'inccat4'], how='left').reset_index() + comparison.household_id.fillna(0, inplace=True) + comparison['needed'] = comparison['indincint'] - comparison['household_id'] + b = comparison[comparison['needed'] > 0] + workerdf = workerdf.merge(comparison[['indus', 'hhinc4', 'needed']], + left_on=['naics2', 'inccat4'], right_on=['indus', 'hhinc4'], how='left') + workerdf.loc[workerdf['edu'] == 1, 'eduwgt'] = eduweights[0] + workerdf.loc[workerdf['edu'] == 2, 'eduwgt'] = eduweights[1] + workerdf.loc[workerdf['edu'] == 3, 'eduwgt'] = eduweights[2] + + return b, workerdf + + +def roundtwo(dfb, workerdf, flagvalue): + """ + finish flagging workers to meet target values (outside of jwtr == 11) using eduwgt column + :param dfb: guiding dataframe with targets values for each category + :param workerdf: worker df with selected flag column + :param flagvalue: value to set 'selected' column to + :return: workerdf + """ + dolater = {} + + for i, row in dfb.iterrows(): + ind = row['indus'] + inc4 = row['hhinc4'] + target = int(row['needed']) + options = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & ( + workerdf['inccat4'] == inc4)] + + if len(options) >= target: + ilist = [x for x in options.sample(target, weights='eduwgt', random_state=seedvalue).index.values] + workerdf.loc[workerdf.index.isin(ilist), 'selected'] = flagvalue + elif len(options) < target: + dolater[i] = (ind,inc4,target) + + + if len(dolater) > 0: + donow = pd.DataFrame.from_dict(dolater, orient='index', columns=['ind','inc','target']) + for i,row in donow.iterrows(): + ind = row['ind'] + inc4 = row['inc'] + target = int(row['target']) + options = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & ( + workerdf['inccat4'] == inc4)] + ilist = [x for x in options.index.values] + stillneed = target - len(ilist) + options2 = workerdf[(workerdf['naics2'] == ind) & (workerdf['selected'] == 0) & (workerdf.inccat4 != inc4)] + ilist.extend([x for x in options2.sample(stillneed).index.values]) + + workerdf.loc[workerdf.index.isin(ilist), 'selected'] = flagvalue + + return workerdf + + +########################################## +# part one: usualwfh +########################################## + +# industry targets +finaltargets1 = settargets(indmix1, targetwfh) + +print('selecting usualwfh from JWTR 11...') +# round one +workers = jwtr11flag(finaltargets1, workers, 1) + +# round two +b1, workers = preproundtwo(workers, 1, eduw, finaltargets1) + +print('selecting usualwfh from any...') +workers = roundtwo(b1, workers, 1) + +current = workers.selected.sum() +remaining = int(targetwfh - current) +print('checking overall usualwfh target met...') +assert remaining == 0, "overall usualwfh target not met" +workers = workers.drop(['indus', 'hhinc4', 'needed', 'eduwgt'], axis=1) + +########################################## +# part two: tc14 +########################################## + +# industry targets +finaltargets2 = settargets(indmix2, targettc14) + +print('selecting tc14 from JWTR 11...') +# round one +workers = jwtr11flag(finaltargets2, workers, 2) + +# round two +b2, workers = preproundtwo(workers, 2, eduwtc, finaltargets2) + +print('selecting tc14 from any...') +workers = roundtwo(b2, workers, 2) + +current = len(workers[workers['selected'] == 2]) +remaining = int(targettc14 - current) +print('checking overall tc14 target met...') +assert remaining == 0, "overall tc14 target not met" + +########################################## +# assign tc14 wfh status for given day +########################################## +print('assigning wfh status for tc14...') +workers['tc'] = 0 + +tc1 = targettc14 * tcportions[0] +tc2 = targettc14 * tcportions[1] +tc3 = targettc14 * tcportions[2] +tc4 = targettc14 * tcportions[3] + +count = 4 +for t in [tc4, tc3, tc2]: + options = workers[(workers['selected'] == 2) & (workers['tc'] == 0)] + it = int(t) + ilist = [x for x in options.sample(it, random_state=seedvalue).index.values] + workers.loc[workers.index.isin(ilist), 'tc'] = count + count -= 1 + +workers.loc[(workers['selected'] == 2) & (workers['tc'] == 0), 'tc'] = 1 + +workers['random'] = np.random.random(size=len(workers)) +workers.loc[(workers['selected'] == 2) & (workers['random'] < (workers['tc'] / 5)), 'working'] = 1 +workers.working.fillna(0, inplace=True) + +########################################## +# format and save +########################################## +print('preparing and saving HH_WFH_STATUS.csv...') +popsynhh = pd.read_csv(popsynhhpath, names=['sz', 'hhtype', 'vehicles', + 'serialno', 'stpuma5', 'rowcol', 'adults', 'workers', + 'children', 'iq', 'age', 'hhvtype', 'income']) +workers.loc[workers['selected'] == 1, 'usualwfh'] = 1 +workers.loc[(workers['selected'] == 2) & (workers['working'] == 1), 'tc14'] = 1 +workers.loc[(workers['selected'] == 2) & (workers['working'] == 0), 'tc14nw'] = 1 +workers.usualwfh.fillna(0, inplace=True) +workers.tc14.fillna(0, inplace=True) +workers.tc14nw.fillna(0, inplace=True) +hhsummary = workers.groupby('household_id').agg({'usualwfh':'sum','tc14':'sum','tc14nw':'max'}).reset_index() +hhsummary['wfhworkers'] = hhsummary['tc14'] + hhsummary['usualwfh'] +hhsummary.loc[(hhsummary['tc14'] > 0) & (hhsummary['usualwfh'] > 0), 'finalflag'] = 1 +hhsummary.loc[(hhsummary['tc14'] > 0) & (hhsummary['usualwfh'] == 0), 'finalflag'] = 2 +hhsummary.loc[(hhsummary['tc14'] == 0) & (hhsummary['usualwfh'] > 0), 'finalflag'] = 1 +final1 = dfhh[['household_id','SERIALNO']].merge(hhsummary[['household_id','finalflag','wfhworkers','tc14nw']],on='household_id', how='left') +final1.finalflag.fillna(0, inplace=True) +try: + final1['SERIALNO'] = final1['SERIALNO'].str.replace('HU','99') +except AttributeError: + pass +final1sort = final1.sort_values('SERIALNO').reset_index() + +pssort = popsynhh.sort_values('serialno').reset_index() +final1sort['newindex'] = pssort['index'] +final1sort['sn2'] = pssort['serialno'] +final1sort.sort_values('newindex', inplace=True) +final1sort.set_index('newindex', inplace=True) +final1sort['SERIALNO'] = final1sort.SERIALNO.astype('int64') +final1sort['diffcheck'] = final1sort['SERIALNO'] - final1sort['sn2'] +assert (final1sort.diffcheck == 0).all(), "file not aligned with popsyn_hh" +final1sort.wfhworkers.fillna(0, inplace=True) +final1sort.tc14nw.fillna(0, inplace=True) +final1sort['finalflag'] = final1sort.finalflag.astype('int') +final1sort['wfhworkers'] = final1sort.wfhworkers.astype('int') +final1sort['tc14nw'] = final1sort.tc14nw.astype('int') +final1sort[['SERIALNO', 'finalflag','wfhworkers','tc14nw']].to_csv(savedir + "/HH_WFH_STATUS.CSV", index=False, header=False) + +########################################## +# save additional files +########################################## + +if savefiles == "Y": + print('saving additional files...') + workers.to_csv("workers.csv", index=False) + finaltargets1.to_csv("finaltargets_usualwfh.csv", index=False) + finaltargets2.to_csv("finaltargets_tc14.csv", index=False) \ No newline at end of file diff --git a/Database/trip_gen.bat b/Database/trip_gen.bat index c571a05..2e52ebe 100644 --- a/Database/trip_gen.bat +++ b/Database/trip_gen.bat @@ -48,17 +48,43 @@ for /f "eol=# skip=5 tokens=2 delims=:" %%c in (batch_file.yaml) do (set wfh=%%c :break3 for /f "eol=# skip=6 tokens=2 delims=:" %%d in (batch_file.yaml) do (set tc14=%%d & goto break4) :break4 + +rem -- Read model run settings from Telework.yaml -- +for /f "eol=# skip=2 tokens=2 delims=:" %%e in (Telework.yaml) do (set wfhLow=%%e & goto break5) +:break5 +for /f "eol=# skip=3 tokens=2 delims=:" %%f in (Telework.yaml) do (set wfhMedium=%%f & goto break6) +:break6 +for /f "eol=# skip=4 tokens=2 delims=:" %%g in (Telework.yaml) do (set wfhHigh=%%g & goto break7) +:break7 + set sc=%sc:~1,3% set wfhFile=%wfhFile:~1% set wfh=%wfh:~1% set tc14=%tc14:~1% +set wfhLow=%wfhLow:~1% +set wfhMedium=%wfhMedium:~1% +set wfhHigh=%wfhHigh:~1% + +for /f %%a in ('powershell -NoProfile -Command "(1000 + (200 - %sc%) * 1000 / 5000)/1000"') do set DeclineScaled=%%a +for /f %%a in ('powershell -NoProfile -Command "%wfhLow% * %DeclineScaled%"') do set wfhLowSc=%%a +for /f %%a in ('powershell -NoProfile -Command "%wfhMedium% * %DeclineScaled%"') do set wfhMediumSc=%%a +for /f %%a in ('powershell -NoProfile -Command "%wfhHigh% * %DeclineScaled%"') do set wfhHighSc=%%a + + @echo. @echo ======================================== @echo --- Model Run Settings --- @echo Scenario = %sc% @echo Create WFH validation file = %wfhFile% -@echo Usual WFH share = %wfh% -@echo WFH 1-4 days share = %tc14% +@echo 2019 Usual WFH share = %wfh% +@echo 2019 WFH 1-4 days share = %tc14% +@echo 2025/26 WFH low rate group share= %wfhLow% +@echo 2025/26 WFH medium rate group share= %wfhMedium% +@echo 2025/26 WFH high rate group share= %wfhHigh% +@echo Decline factor= %DeclineScaled% +@echo Scenario WFH low rate group share= %wfhLowSc% +@echo Scenario WFH medium rate group share= %wfhMediumSc% +@echo Scenario WFH high rate group share= %wfhHighSc% @echo ======================================== @echo. rem @@ -159,8 +185,15 @@ cd wfhmodule set filedir="%cd%" echo. echo Starting the work-from-home allocation model ... -echo wfh arguments: %filedir% %savedir% %wfhFile% %wfh% %tc14% -python wfhflag.py %filedir% %savedir% %wfhFile% %wfh% %tc14% + +if %val%==100 ( + echo Running wfhflag_old.py ... + python wfhflag_old.py %filedir% %savedir% %wfhFile% %wfh% %tc14% +) else ( + echo Running wfhflag.py ... + python wfhflag.py %filedir% %savedir% %wfhFile% %wfhLowSc% %wfhMediumSc% %wfhHighSc% +) + if %ERRORLEVEL% NEQ 0 (goto wfh_issue) cd ..