-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathget_all_filings_settings_between_dates.py
More file actions
136 lines (109 loc) · 3.98 KB
/
get_all_filings_settings_between_dates.py
File metadata and controls
136 lines (109 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
Script to get all filings and settings between two dates.
To use:
python get_all_filings_settings_between_dates.py mm-dd-yyy mm-dd-yyyy
"""
from datetime import date, timedelta
import logging
import sys
from typing import List, Tuple
import click
from colorama import Fore, Style
from emailing import send_email
import parse_filings
import parse_settings
import scrapers
logger = logging.getLogger()
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.INFO)
def split_into_weeks(start: date, end: date) -> List[Tuple[date, date]]:
"""Get start and end dates for all weeks between specified start and end dates."""
days_in_range = ((end - start).days) + 1
if days_in_range > 7:
first_end_date = start + timedelta(days=6)
next_start_date = first_end_date + timedelta(days=1)
return [(start, first_end_date)] + split_into_weeks(next_start_date, end)
else:
return [(start, end)]
def try_to_parse(
start: str, end: str, tries: int, scraper: scrapers.FakeScraper
) -> str:
"""
Parses filings and settings between start and end dates.
Tries `tries` times before giving up.
If all attempts fail, returns the start and end date, otherwise returns 'success'.
"""
for attempt in range(1, tries + 1):
try:
parse_filings.parse_filings_on_cloud(
afterdate=start, beforedate=end, get_old_active=False, scraper=scraper
)
parse_settings.parse_settings_on_cloud(
afterdate=start, beforedate=end, write_to_sheets=False, scraper=scraper
)
logger.info(
Fore.GREEN
+ "Successfully parsed filings and settings "
+ f"between {start} and {end} on attempt {attempt}.\n"
+ Style.RESET_ALL
)
return "success"
except Exception as error:
if attempt == tries:
logger.error(f"Error message: {error}")
message = f"{start}, {end}"
logger.error(
Fore.RED
+ f"Failed to parse filings and settings between {start} "
+ f"and {end} on all {tries} attempts.\n"
+ Style.RESET_ALL
)
return message
def get_all_filings_settings_between_dates(
start_date: date, end_date: date, county: str, showbrowser=bool
) -> List[str]:
"""
Gets all filings and settings between `start_date` and `end_date` but splits it up by week.
Logs the weeks that failed.
"""
weeks = split_into_weeks(start_date, end_date)
logger.info(
f"Will get all filings and settings between {start_date} and {end_date}\n"
)
failures: List[str] = []
scraper = scrapers.SCRAPER_NAMES[county](headless=not showbrowser)
for week_start, week_end in weeks:
msg = try_to_parse(week_start, week_end, 5, scraper=scraper)
if msg != "success":
failures.append(msg)
if failures:
failures_str = "\n".join(failures)
logger.info("All failures:")
logger.info(Fore.RED + failures_str + Style.RESET_ALL)
send_email(
failures_str, "Date ranges for which parsing filings and settings failed"
)
else:
logger.info(
Fore.GREEN
+ "There were no failures when getting all filings "
+ f"between {start_date} and {end_date} - yay!!"
+ Style.RESET_ALL
)
if __name__ == "__main__":
@click.command()
@click.argument(
"start_date", type=click.DateTime(formats=["%Y-%m-%d", "%m-%d-%Y", "%m/%d/%Y"])
)
@click.argument(
"end_date", type=click.DateTime(formats=["%Y-%m-%d", "%m-%d-%Y", "%m/%d/%Y"])
)
@click.option(
"--county",
type=click.Choice(scrapers.SCRAPER_NAMES, case_sensitive=False),
default="travis",
)
# dates should be in format (m)m-(d)d-yyyy
def get_all_between_dates(start_date, end_date, county):
get_all_filings_settings_between_dates(start_date, end_date, county)
get_all_between_dates()