Skip to content

Commit 88d66a1

Browse files
committed
better exercise script
1 parent 30a473e commit 88d66a1

File tree

1 file changed

+121
-109
lines changed

1 file changed

+121
-109
lines changed

exercise.py

Lines changed: 121 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -23,118 +23,130 @@ def _exercise_path(dirname) -> Path:
2323
return Path(os.path.dirname(__file__)).joinpath(dirname)
2424

2525

26-
# Download compressed archive of full-scale data, if not already present
27-
# If the full scale data is already available, the whole data download process
28-
# can be commented out and skipped.
26+
GET_FULL_DATA = True
2927

3028
full_data_dir = _exercise_path("data-full")
3129

32-
archive_sha256 = {
33-
11: "5b0c7ad009115830fbedaee9dd33981b3bab23b3b7177a7a0a8f3c871decf989",
34-
10: "c35b7c7f83be21159b20da8185cb5bd78b812378b424c20bdc1f5df51b283921",
35-
4: "13f04db524324b48b9244e872cc997c97a0a920548ce5d5ef3fe4af7f09e9517",
36-
3: "a6751f10aee9deec531582862368519f70f6d6f03f344bf82397d099d36537e1",
37-
2: "ee1cdab914dcba7a0feb01b65822c6160be3f60aba29f797259c7dd0a2a40d3a",
38-
5: "1663dfaf6eda027850f7c79a783d646bdc58a6a588bc1308fc31ea9cb3f85f2d",
39-
12: "35b664eedaece82b9ae0167664e618a7daa4079ca99ebf8cc01dd61d7ff4a51b",
40-
15: "776fed4a3bd01e98caa4aea4f36c99465af252d5ae1a66d66897496271805e35",
41-
14: "0cce0a90ac662e2d40d89abd28640dd5457e74f0b1a4eabd5f53f747955fe335",
42-
13: "767cc029fe36e0a67446cf822c4e34525053a0857228efb2f50c17785e88a7a1",
43-
9: "6e49b3738acfd0778becaf984f570906cfbc600e14f2de0d03c627f95c61afde",
44-
0: "adeae9915a0402b87937ba19e6732396d29813cec64bb1b1d2c66e336ca349a4",
45-
7: "3df78f56eb383c4adba4e32ccf78700151e71a8271f3f525317574cdcb61adbb",
46-
6: "a204166e2875368314cae7070fcd77591603c75565986dfb4b171b1e08400c4f",
47-
1: "4322559d96c7c1521760f875da7aeb92b9dae7a19824717f5c1ae086623f15a0",
48-
8: "7dfb447beaf4e5fc2f9656c7e6427a1149f3a0f3c5f6f7c285b508a279d7eab5",
49-
}
50-
51-
full_data_sha256 = {
52-
"persons.parquet": "f41434b49d87aa9bb19296c5ae271c25a07356e3e370659a6230017c82c881b7",
53-
"transit_skims_PM.omx": "20d9af6f6be2f78ce81f817aca01eb05611a7a1702e9de896db8a918af11421f",
54-
"maz_maz_walk.parquet": "8759bdae920e6f507120e68eab3ead2e7738e240c22512469bc734cc95bb7c59",
55-
"traffic_skims_MD.omx": "5cdd041d4324f7898b17b22555af65201ed323cebaf7ba34a0351df981db733c",
56-
"households.parquet": "c75156d739ae71b01e0d3be7563b04a115b987bd8e8587173d7957aab58f4a89",
57-
"transit_skims_MD.omx": "535309745b79ad8a71601228b4bb6824e2996632fe177fe9e8cd7b56693eef4d",
58-
"traffic_skims_PM.omx": "434996674399cdfd1073c4d24cd8b3a5691c541f97386a8d29c4833b9ce85c7f",
59-
"traffic_skims_EA.omx": "fe097d769c373bd37ab24f57ff102c70213055aae73be9a5a9c3d5d762bf2f0e",
60-
"traffic_skims_EV.omx": "91c762df3288867a050395691cf1f13b9850f6e8ea55163730f8fceb4f8fca98",
61-
"transit_skims_AM.omx": "7fc26ce47bfc4c6844a6fcd193d4808dda711bf2a8e02a788b33ba21d000b88f",
62-
"maz_stop_walk.parquet": "f94fe8690db2342546be3592da4f20088bce7b026749f626a66ce2126555946e",
63-
"maz_maz_bike.parquet": "9ad9f5108b5dd88d893bc2cb56354400fe29749310d39919bc9e88e9b5ddb036",
64-
"land_use.parquet": "a2b41246fbfed8250e9fcda0853da1bd33a05cf5d0699f965a93759e39c8071b",
65-
"transit_skims_EV.omx": "6fcec702b5d4ebc01e88b5dec075fcc9b7ee6c26d32a751dc3b99536e80a336d",
66-
"transit_skims_EA.omx": "96111a202d4d2630fd2e749f9a3f96dbc6f314d4a3abb4d5cb92f0ff1337d6d0",
67-
"traffic_skims_AM.omx": "e31e1005897eaf30e3415b12c93696d88fbc42ee6c9b75f35178ad196f0eb80f",
68-
}
69-
70-
download_required = False
71-
72-
for filename, sha256 in full_data_sha256.items():
73-
f = full_data_dir.joinpath(filename)
74-
print("checking", f)
75-
if not f.exists() or sha256 != sha256_checksum(f):
76-
download_required = True
77-
break
78-
79-
if download_required:
80-
print("downloading full data...")
81-
download_external_example(
82-
_exercise_path("."),
83-
name="sandag-abm3",
84-
assets={
85-
f"data-full.tar.zst.part{i:03}": {
86-
"url": f"https://github.com/ActivitySim/sandag-abm3-example/releases/download/v0.2.0/sandag-full-data.tar.zst.part{i:03}",
87-
"sha256": sha256,
88-
}
89-
for i, sha256 in archive_sha256.items()
90-
},
91-
)
92-
untarzst(
93-
_exercise_path("sandag-abm3/data-full.tar.zst.part000"),
94-
full_data_dir,
95-
)
96-
# recheck sha256
30+
def get_full_data():
31+
"""
32+
Download the full-scale data archive and extract it.
33+
34+
This function downloads the full-scale data archive, if not already present,
35+
and verifies that the data is correct by checking the sha256 checksum of each
36+
file. If the full scale data is already available, the whole data download
37+
process can be skipped by setting the global variable GET_FULL_DATA to False.
38+
"""
39+
archive_sha256 = {
40+
11: "5b0c7ad009115830fbedaee9dd33981b3bab23b3b7177a7a0a8f3c871decf989",
41+
10: "c35b7c7f83be21159b20da8185cb5bd78b812378b424c20bdc1f5df51b283921",
42+
4: "13f04db524324b48b9244e872cc997c97a0a920548ce5d5ef3fe4af7f09e9517",
43+
3: "a6751f10aee9deec531582862368519f70f6d6f03f344bf82397d099d36537e1",
44+
2: "ee1cdab914dcba7a0feb01b65822c6160be3f60aba29f797259c7dd0a2a40d3a",
45+
5: "1663dfaf6eda027850f7c79a783d646bdc58a6a588bc1308fc31ea9cb3f85f2d",
46+
12: "35b664eedaece82b9ae0167664e618a7daa4079ca99ebf8cc01dd61d7ff4a51b",
47+
15: "776fed4a3bd01e98caa4aea4f36c99465af252d5ae1a66d66897496271805e35",
48+
14: "0cce0a90ac662e2d40d89abd28640dd5457e74f0b1a4eabd5f53f747955fe335",
49+
13: "767cc029fe36e0a67446cf822c4e34525053a0857228efb2f50c17785e88a7a1",
50+
9: "6e49b3738acfd0778becaf984f570906cfbc600e14f2de0d03c627f95c61afde",
51+
0: "adeae9915a0402b87937ba19e6732396d29813cec64bb1b1d2c66e336ca349a4",
52+
7: "3df78f56eb383c4adba4e32ccf78700151e71a8271f3f525317574cdcb61adbb",
53+
6: "a204166e2875368314cae7070fcd77591603c75565986dfb4b171b1e08400c4f",
54+
1: "4322559d96c7c1521760f875da7aeb92b9dae7a19824717f5c1ae086623f15a0",
55+
8: "7dfb447beaf4e5fc2f9656c7e6427a1149f3a0f3c5f6f7c285b508a279d7eab5",
56+
}
57+
58+
full_data_sha256 = {
59+
"persons.parquet": "f41434b49d87aa9bb19296c5ae271c25a07356e3e370659a6230017c82c881b7",
60+
"transit_skims_PM.omx": "20d9af6f6be2f78ce81f817aca01eb05611a7a1702e9de896db8a918af11421f",
61+
"maz_maz_walk.parquet": "8759bdae920e6f507120e68eab3ead2e7738e240c22512469bc734cc95bb7c59",
62+
"traffic_skims_MD.omx": "5cdd041d4324f7898b17b22555af65201ed323cebaf7ba34a0351df981db733c",
63+
"households.parquet": "c75156d739ae71b01e0d3be7563b04a115b987bd8e8587173d7957aab58f4a89",
64+
"transit_skims_MD.omx": "535309745b79ad8a71601228b4bb6824e2996632fe177fe9e8cd7b56693eef4d",
65+
"traffic_skims_PM.omx": "434996674399cdfd1073c4d24cd8b3a5691c541f97386a8d29c4833b9ce85c7f",
66+
"traffic_skims_EA.omx": "fe097d769c373bd37ab24f57ff102c70213055aae73be9a5a9c3d5d762bf2f0e",
67+
"traffic_skims_EV.omx": "91c762df3288867a050395691cf1f13b9850f6e8ea55163730f8fceb4f8fca98",
68+
"transit_skims_AM.omx": "7fc26ce47bfc4c6844a6fcd193d4808dda711bf2a8e02a788b33ba21d000b88f",
69+
"maz_stop_walk.parquet": "f94fe8690db2342546be3592da4f20088bce7b026749f626a66ce2126555946e",
70+
"maz_maz_bike.parquet": "9ad9f5108b5dd88d893bc2cb56354400fe29749310d39919bc9e88e9b5ddb036",
71+
"land_use.parquet": "a2b41246fbfed8250e9fcda0853da1bd33a05cf5d0699f965a93759e39c8071b",
72+
"transit_skims_EV.omx": "6fcec702b5d4ebc01e88b5dec075fcc9b7ee6c26d32a751dc3b99536e80a336d",
73+
"transit_skims_EA.omx": "96111a202d4d2630fd2e749f9a3f96dbc6f314d4a3abb4d5cb92f0ff1337d6d0",
74+
"traffic_skims_AM.omx": "e31e1005897eaf30e3415b12c93696d88fbc42ee6c9b75f35178ad196f0eb80f",
75+
}
76+
77+
download_required = False
78+
9779
for filename, sha256 in full_data_sha256.items():
98-
if not full_data_dir.joinpath(filename).exists():
99-
raise ValueError(f"data missing: {filename}")
100-
if sha256 != sha256_checksum(full_data_dir.joinpath(filename)):
101-
raise ValueError(f"data error: {filename}")
102-
else:
103-
print("full data ready")
104-
105-
###########################
106-
### MODEL EXERCISE ###
107-
###########################
108-
109-
out_dir = _exercise_path("exercise-output-25k")
110-
out_dir.mkdir(exist_ok=True)
111-
out_dir.joinpath(".gitignore").write_text("**\n")
112-
113-
settings = dict(
114-
cleanup_pipeline_after_run=False,
115-
treat_warnings_as_errors=False,
116-
households_sample_size=100_000,
117-
chunk_size=0,
118-
use_shadow_pricing=True,
119-
sharrow="require",
120-
recode_pipeline_columns=True,
121-
memory_profile=True,
122-
)
123-
124-
state = workflow.State.make_default(
125-
configs_dir=(
126-
_exercise_path(r"configs/common"),
127-
_exercise_path(r"configs/resident"),
128-
),
129-
data_dir=_exercise_path("data-full"),
130-
output_dir=out_dir,
131-
settings=settings,
132-
)
133-
state.import_extensions("../extensions")
134-
state.filesystem.persist_sharrow_cache()
135-
assert state.settings.memory_profile == True
136-
137-
assert state.settings.chunk_size == 0
80+
f = full_data_dir.joinpath(filename)
81+
print("checking", f)
82+
if not f.exists() or sha256 != sha256_checksum(f):
83+
download_required = True
84+
break
85+
86+
if download_required:
87+
print("downloading full data...")
88+
download_external_example(
89+
_exercise_path("."),
90+
name="sandag-abm3",
91+
assets={
92+
f"data-full.tar.zst.part{i:03}": {
93+
"url": f"https://github.com/ActivitySim/sandag-abm3-example/releases/download/v0.2.0/sandag-full-data.tar.zst.part{i:03}",
94+
"sha256": sha256,
95+
}
96+
for i, sha256 in archive_sha256.items()
97+
},
98+
)
99+
untarzst(
100+
_exercise_path("sandag-abm3/data-full.tar.zst.part000"),
101+
full_data_dir,
102+
)
103+
# recheck sha256
104+
for filename, sha256 in full_data_sha256.items():
105+
if not full_data_dir.joinpath(filename).exists():
106+
raise ValueError(f"data missing: {filename}")
107+
if sha256 != sha256_checksum(full_data_dir.joinpath(filename)):
108+
raise ValueError(f"data error: {filename}")
109+
else:
110+
print("full data ready")
111+
112+
113+
def main(**settings):
114+
"""
115+
Run the full-scale model exercise.
116+
"""
117+
out_dir = _exercise_path("exercise-output")
118+
out_dir.mkdir(exist_ok=True)
119+
out_dir.joinpath(".gitignore").write_text("**\n")
120+
121+
state = workflow.State.make_default(
122+
configs_dir=(
123+
_exercise_path(r"configs/common"),
124+
_exercise_path(r"configs/resident"),
125+
),
126+
data_dir=_exercise_path("data-full"),
127+
output_dir=out_dir,
128+
settings=settings,
129+
)
130+
state.import_extensions("../extensions")
131+
state.filesystem.persist_sharrow_cache()
132+
state.run.all()
133+
return state
134+
138135

139136
if __name__ == "__main__":
140-
state.run.all()
137+
138+
if GET_FULL_DATA or not full_data_dir.exists():
139+
get_full_data()
140+
141+
# Modify the settings value here to alter the default settings
142+
# defined in the various config files.
143+
state = main(
144+
cleanup_pipeline_after_run=False,
145+
treat_warnings_as_errors=False,
146+
households_sample_size=100_000,
147+
chunk_size=0,
148+
use_shadow_pricing=True,
149+
sharrow="require",
150+
recode_pipeline_columns=True,
151+
memory_profile=True,
152+
)

0 commit comments

Comments
 (0)