Skip to content

Commit 3f31244

Browse files
committed
force an update if the cached sheet is old
Signed-off-by: John Seekins <[email protected]>
1 parent 14ab1fa commit 3f31244

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

scraper.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,21 @@ def _download_sheet(self) -> None:
3838
links = soup.findAll("a", href=re.compile("^https://www.ice.gov/doclib.*xlsx"))
3939
# quick solution is first result
4040
self.sheet_url = links[0]["href"]
41-
if not os.path.isfile(self.filename) or os.path.getsize(self.filename) < 1:
41+
now = time.time()
42+
# one day in seconds is 86400
43+
if (
44+
not os.path.isfile(self.filename)
45+
or os.path.getsize(self.filename) < 1
46+
or now - os.path.getmtime(self.filename) > 86400
47+
):
4248
logger.info("Downloading detention stats sheet from %s", self.sheet_url)
4349
resp = session.get(self.sheet_url, timeout=120)
4450
with open(self.filename, "wb") as f:
4551
for chunk in resp.iter_content(chunk_size=1024):
4652
if chunk:
4753
f.write(chunk)
54+
else:
55+
logger.info("Using cached detention stats sheet: %s", self.filename)
4856

4957
def _clean_street(self, street: str, locality: str = "") -> Tuple[str, bool]:
5058
"""Generally, we'll let the spreadsheet win arguments just to be consistent"""

0 commit comments

Comments
 (0)