Skip to content

Commit 9cb568f

Browse files
DRIVERS-3032 Make MongoDB downloads more robust (#613)
Co-authored-by: Ezra Chung <[email protected]>
1 parent 17be5e8 commit 9cb568f

File tree

2 files changed

+32
-23
lines changed

2 files changed

+32
-23
lines changed

.evergreen/mongodl.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -609,30 +609,35 @@ def download_file(self, url: str) -> DownloadResult:
609609
if modtime:
610610
headers["If-Modified-Since"] = modtime
611611
digest = hashlib.sha256(url.encode("utf-8")).hexdigest()[:4]
612-
dest = self._dirpath / "files" / digest / PurePosixPath(url).name
612+
file_name = PurePosixPath(url).name
613+
dest = self._dirpath / "files" / digest / file_name
613614
if not dest.exists():
614615
headers = {}
615616
req = urllib.request.Request(url, headers=headers)
616617

617618
try:
618-
resp = urllib.request.urlopen(req, context=SSL_CONTEXT)
619+
resp = urllib.request.urlopen(req, context=SSL_CONTEXT, timeout=30)
619620
except urllib.error.HTTPError as e:
620621
if e.code != 304:
621622
raise RuntimeError(f"Failed to download [{url}]") from e
622623
assert dest.is_file(), (
623624
"The download cache is missing an expected file",
624625
dest,
625626
)
627+
LOGGER.info("Using cached file %s", file_name)
626628
return DownloadResult(False, dest)
627629

628630
_mkdir(dest.parent)
629631
got_etag = resp.getheader("ETag")
630632
got_modtime = resp.getheader("Last-Modified")
633+
got_len = int(resp.getheader("Content-Length"))
631634
with dest.open("wb") as of:
632-
buf = resp.read(1024 * 1024 * 4)
633-
while buf:
634-
of.write(buf)
635-
buf = resp.read(1024 * 1024 * 4)
635+
shutil.copyfileobj(resp, of, length=got_len)
636+
file_size = dest.stat().st_size
637+
if file_size != got_len:
638+
raise RuntimeError(
639+
f"File size: {file_size} does not match download size: {got_len}"
640+
)
636641
self._db(
637642
"INSERT OR REPLACE INTO mdl_http_downloads (url, etag, last_modified) "
638643
"VALUES (:url, :etag, :mtime)",
@@ -895,7 +900,8 @@ def _dl_component(
895900
return _expand_archive(
896901
cached, out_dir, pattern, strip_components, test=test
897902
)
898-
except Exception:
903+
except Exception as e:
904+
LOGGER.exception(e)
899905
if not retrier.retry():
900906
raise
901907

.evergreen/mongosh_dl.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@
2525
from mongodl import LOGGER as DL_LOGGER
2626
from mongodl import (
2727
SSL_CONTEXT,
28+
Cache,
2829
DownloadRetrier,
2930
ExpandResult,
3031
_expand_archive,
32+
default_cache_dir,
3133
infer_arch,
3234
)
3335

@@ -40,7 +42,7 @@ def _get_latest_version():
4042
url = "https://api.github.com/repos/mongodb-js/mongosh/releases"
4143
req = urllib.request.Request(url, headers=headers)
4244
try:
43-
resp = urllib.request.urlopen(req, context=SSL_CONTEXT)
45+
resp = urllib.request.urlopen(req, context=SSL_CONTEXT, timeout=30)
4446
except Exception:
4547
return _get_latest_version_git()
4648

@@ -73,6 +75,7 @@ def _get_latest_version_git():
7375

7476

7577
def _download(
78+
cache: Cache,
7679
out_dir: Path,
7780
version: str,
7881
target: str,
@@ -108,24 +111,15 @@ def _download(
108111
if no_download:
109112
return ExpandResult.Okay
110113

111-
req = urllib.request.Request(dl_url)
112114
retrier = DownloadRetrier(retries)
113115
while True:
114116
try:
115-
resp = urllib.request.urlopen(req)
116-
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as fp:
117-
four_mebibytes = 1024 * 1024 * 4
118-
buf = resp.read(four_mebibytes)
119-
while buf:
120-
fp.write(buf)
121-
buf = resp.read(four_mebibytes)
122-
fp.close()
123-
resp = _expand_archive(
124-
Path(fp.name), out_dir, pattern, strip_components, test=test
125-
)
126-
os.remove(fp.name)
127-
return resp
128-
except Exception:
117+
cached = cache.download_file(dl_url).path
118+
return _expand_archive(
119+
cached, out_dir, pattern, strip_components, test=test
120+
)
121+
except Exception as e:
122+
LOGGER.exception(e)
129123
if not retrier.retry():
130124
raise
131125

@@ -140,6 +134,12 @@ def main(argv=None):
140134
parser.add_argument(
141135
"--quiet", "-q", action="store_true", help="Whether to log at the WARNING level"
142136
)
137+
parser.add_argument(
138+
"--cache-dir",
139+
type=Path,
140+
default=default_cache_dir(),
141+
help="Directory where download caches and metadata will be stored",
142+
)
143143
dl_grp = parser.add_argument_group(
144144
"Download arguments",
145145
description="Select what to download and extract. "
@@ -220,7 +220,10 @@ def main(argv=None):
220220
elif args.quiet:
221221
LOGGER.setLevel(logging.WARNING)
222222
DL_LOGGER.setLevel(logging.WARNING)
223+
224+
cache = Cache.open_in(args.cache_dir)
223225
result = _download(
226+
cache,
224227
out,
225228
version=args.version,
226229
target=target,

0 commit comments

Comments
 (0)