Skip to content

Commit 01dfa8e

Browse files
pmeierseemethere
andauthored
add custom user agent for download_url (#3499)
* add custom user agent for download_url * fix progress bar * lint * [test] use repo instead of nightly for download tests * .circleci: Be specific about where pytorch is coming from Signed-off-by: Eli Uriegas <[email protected]> * torchvision: Add more info to user-agent Signed-off-by: Eli Uriegas <[email protected]> * .circleci: Increase timeout for conda packages The conda resolver is extremely slow so let's just give it more time to idly sit by and resolve dependencies Signed-off-by: Eli Uriegas <[email protected]> Co-authored-by: Eli Uriegas <[email protected]>
1 parent 506279c commit 01dfa8e

File tree

7 files changed

+48
-23
lines changed

7 files changed

+48
-23
lines changed

.circleci/config.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,11 @@ jobs:
194194
steps:
195195
- checkout_merge
196196
- designate_upload_channel
197-
- run: packaging/build_conda.sh
197+
- run:
198+
name: Build conda packages
199+
no_output_timeout: 20m
200+
command: |
201+
packaging/build_conda.sh
198202
- store_artifacts:
199203
path: /opt/conda/conda-bld/linux-64
200204
- persist_to_workspace:
@@ -593,7 +597,7 @@ jobs:
593597

594598
keys:
595599
- env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
596-
600+
597601
- run:
598602
name: Setup
599603
command: .circleci/unittest/windows/scripts/setup_env.sh

.circleci/config.yml.in

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,11 @@ jobs:
194194
steps:
195195
- checkout_merge
196196
- designate_upload_channel
197-
- run: packaging/build_conda.sh
197+
- run:
198+
name: Build conda packages
199+
no_output_timeout: 20m
200+
command: |
201+
packaging/build_conda.sh
198202
- store_artifacts:
199203
path: /opt/conda/conda-bld/linux-64
200204
- persist_to_workspace:
@@ -593,7 +597,7 @@ jobs:
593597
{% raw %}
594598
keys:
595599
- env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
596-
{% endraw %}
600+
{% endraw %}
597601
- run:
598602
name: Setup
599603
command: .circleci/unittest/windows/scripts/setup_env.sh

.circleci/unittest/linux/scripts/install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ else
2424
fi
2525

2626
printf "Installing PyTorch with %s\n" "${cudatoolkit}"
27-
conda install -y -c "pytorch-${UPLOAD_CHANNEL}" -c conda-forge pytorch "${cudatoolkit}"
27+
conda install -y -c "pytorch-${UPLOAD_CHANNEL}" -c conda-forge "pytorch-${UPLOAD_CHANNEL}::pytorch" "${cudatoolkit}"
2828

2929
printf "* Installing torchvision\n"
3030
python setup.py develop

.circleci/unittest/windows/scripts/install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ else
2626
fi
2727

2828
printf "Installing PyTorch with %s\n" "${cudatoolkit}"
29-
conda install -y -c "pytorch-${UPLOAD_CHANNEL}" -c conda-forge pytorch "${cudatoolkit}"
29+
conda install -y -c "pytorch-${UPLOAD_CHANNEL}" -c conda-forge "pytorch-${UPLOAD_CHANNEL}::pytorch" "${cudatoolkit}"
3030

3131
printf "* Installing torchvision\n"
3232
"$this_dir/vc_env_helper.bat" python setup.py develop

.github/workflows/tests-schedule.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ jobs:
2626
- name: Checkout repository
2727
uses: actions/checkout@v2
2828

29-
- name: Install PyTorch from the nightlies
30-
run: |
31-
pip install numpy
32-
pip install --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
29+
- name: Install torch nightly build
30+
run: pip install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
31+
32+
- name: Install torchvision
33+
run: pip install -e .
3334

3435
- name: Install all optional dataset requirements
3536
run: pip install scipy pandas pycocotools lmdb requests

test/test_datasets_download.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import pytest
1515

1616
from torchvision import datasets
17-
from torchvision.datasets.utils import download_url, check_integrity, download_file_from_google_drive
17+
from torchvision.datasets.utils import download_url, check_integrity, download_file_from_google_drive, USER_AGENT
1818

1919
from common_utils import get_tmp_dir
2020
from fakedata_generation import places365_root
@@ -150,7 +150,7 @@ def assert_server_response_ok():
150150

151151

152152
def assert_url_is_accessible(url, timeout=5.0):
153-
request = Request(url, headers=dict(method="HEAD"))
153+
request = Request(url, headers={"method": "HEAD", "User-Agent": USER_AGENT})
154154
with assert_server_response_ok():
155155
urlopen(request, timeout=timeout)
156156

@@ -160,7 +160,8 @@ def assert_file_downloads_correctly(url, md5, timeout=5.0):
160160
file = path.join(root, path.basename(url))
161161
with assert_server_response_ok():
162162
with open(file, "wb") as fh:
163-
response = urlopen(url, timeout=timeout)
163+
request = Request(url, headers={"User-Agent": USER_AGENT})
164+
response = urlopen(request, timeout=timeout)
164165
fh.write(response.read())
165166

166167
assert check_integrity(file, md5=md5), "The MD5 checksums mismatch"

torchvision/datasets/utils.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,32 @@
77
from typing import Any, Callable, List, Iterable, Optional, TypeVar
88
from urllib.parse import urlparse
99
import zipfile
10+
import urllib
11+
import urllib.request
12+
import urllib.error
1013

1114
import torch
1215
from torch.utils.model_zoo import tqdm
16+
try:
17+
from ..version import __version__ as __vision_version__ # noqa: F401
18+
except ImportError:
19+
__vision_version__ = "undefined"
20+
21+
USER_AGENT = os.environ.get(
22+
"TORCHVISION_USER_AGENT",
23+
f"pytorch-{torch.__version__}/vision-{__vision_version__}"
24+
)
25+
26+
27+
def _urlretrieve(url: str, filename: str, chunk_size: int = 1024) -> None:
28+
with open(filename, "wb") as fh:
29+
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
30+
with tqdm(total=response.length) as pbar:
31+
for chunk in iter(lambda: response.read(chunk_size), ""):
32+
if not chunk:
33+
break
34+
pbar.update(chunk_size)
35+
fh.write(chunk)
1336

1437

1538
def gen_bar_updater() -> Callable[[int, int, int], None]:
@@ -83,8 +106,6 @@ def download_url(
83106
md5 (str, optional): MD5 checksum of the download. If None, do not check
84107
max_redirect_hops (int, optional): Maximum number of redirect hops allowed
85108
"""
86-
import urllib
87-
88109
root = os.path.expanduser(root)
89110
if not filename:
90111
filename = os.path.basename(url)
@@ -108,19 +129,13 @@ def download_url(
108129
# download the file
109130
try:
110131
print('Downloading ' + url + ' to ' + fpath)
111-
urllib.request.urlretrieve(
112-
url, fpath,
113-
reporthook=gen_bar_updater()
114-
)
132+
_urlretrieve(url, fpath)
115133
except (urllib.error.URLError, IOError) as e: # type: ignore[attr-defined]
116134
if url[:5] == 'https':
117135
url = url.replace('https:', 'http:')
118136
print('Failed download. Trying https -> http instead.'
119137
' Downloading ' + url + ' to ' + fpath)
120-
urllib.request.urlretrieve(
121-
url, fpath,
122-
reporthook=gen_bar_updater()
123-
)
138+
_urlretrieve(url, fpath)
124139
else:
125140
raise e
126141
# check integrity of downloaded file

0 commit comments

Comments
 (0)