|
11 | 11 | import h5py |
12 | 12 | import numpy as np |
13 | 13 | import pandas as pd |
| 14 | +import holidays |
14 | 15 | from skimage.transform import resize |
15 | 16 |
|
16 | 17 | from accounts import account_entries, account_params, json_entries |
@@ -40,6 +41,28 @@ def parse_args(args=None): |
40 | 41 | 'directory.') |
41 | 42 |
|
42 | 43 |
|
| 44 | +def holiday(): |
| 45 | + holidays_dir = os.path.join(data_dir, "holidays") |
| 46 | + if os.path.exists(holidays_dir): |
| 47 | + return |
| 48 | + |
| 49 | + years = [ |
| 50 | + 1990, 1991, 1992, 1993, 1994, |
| 51 | + 1995, 1996, 1997, 1998, 1999 |
| 52 | + ] |
| 53 | + holidays_dict = holidays.US(years=years) |
| 54 | + us_holidays = pd.DataFrame( |
| 55 | + data={ |
| 56 | + "Date": holidays_dict.keys(), |
| 57 | + "holiday": holidays_dict.values() |
| 58 | + }, |
| 59 | + ) |
| 60 | + us_holidays = us_holidays.assign( |
| 61 | + Date=us_holidays.Date.astype("datetime64[ns]")) |
| 62 | + us_holidays.to_parquet(holidays_dir) |
| 63 | + print("Created holidays data.") |
| 64 | + |
| 65 | + |
43 | 66 | def flights(small=None): |
44 | 67 | start = time.time() |
45 | 68 | flights_raw = os.path.join(data_dir, 'nycflights.tar.gz') |
@@ -224,6 +247,7 @@ def main(args=None): |
224 | 247 | accounts_json(args.small) |
225 | 248 | if args.dataset == "flights" or args.dataset == "all": |
226 | 249 | flights(args.small) |
| 250 | + holiday() |
227 | 251 |
|
228 | 252 |
|
229 | 253 | if __name__ == '__main__': |
|
0 commit comments