Skip to content

Commit e61de2f

Browse files
committed
Use GET range request for checking video existence, not OPTIONS
1 parent 9e2c277 commit e61de2f

File tree

3 files changed

+16
-13
lines changed

3 files changed

+16
-13
lines changed

pubspec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ description: Free video dictionary of Auslan signs
33

44
publish_to: 'none'
55

6-
version: 1.19.4+822
6+
version: 1.19.5+823
77

88
environment:
99
sdk: '>=2.18.0 <3.0.0'

scripts/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ For running locally with manual PR creation:
3030
cd scripts
3131

3232
# Step 1: Run the full scrape
33-
uv run ./scrape.sh --validate
33+
uv run ./scrape.sh --validate |& tee ~/run-out.log
3434

3535
# Step 2: Review the output
3636
# Check all_letters.json looks correct

scripts/common.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,34 +40,37 @@ def _rate_limit():
4040
)
4141
def _check_video_url_request(url: str, timeout: int) -> int:
4242
"""
43-
Make an OPTIONS request to check if a video URL exists.
43+
Make a GET request with Range header to check if a video URL exists.
44+
Only requests the first byte to minimize bandwidth.
4445
Returns the status code. Retries on network errors and unexpected status codes.
45-
Only 200 and 404 are considered final responses; other codes trigger a retry.
46+
200/206 = exists, 404 = doesn't exist. Other codes trigger a retry.
4647
"""
47-
LOG.debug(f"Checking video URL with OPTIONS: {url}")
48+
LOG.debug(f"Checking video URL with Range GET: {url}")
4849
_rate_limit()
49-
response = requests.options(url, timeout=timeout)
50+
headers = {"Range": "bytes=0-0"}
51+
response = requests.get(url, headers=headers, timeout=timeout)
5052
status_code = response.status_code
51-
# 200 = exists, 404 = doesn't exist. Both are valid final states.
53+
# 200/206 = exists (206 is partial content for range request).
54+
# 404 = doesn't exist. Both are valid final states.
5255
# Any other status code should trigger a retry.
53-
if status_code not in (200, 404):
56+
if status_code not in (200, 206, 404):
5457
raise RuntimeError(f"Got unexpected status code {status_code} for {url}")
5558
return status_code
5659

5760

5861
def check_video_url_exists(url: str, timeout: int = 30) -> bool:
5962
"""
60-
Check if a video URL is valid by making an OPTIONS request.
61-
Returns True if the URL returns 200, False if 404.
63+
Check if a video URL is valid by making a GET request with Range header.
64+
Returns True if the URL returns 200/206, False if 404.
6265
Retries with exponential backoff on network errors or unexpected status codes.
6366
Raises an exception if retries are exhausted for non-404 errors.
6467
"""
6568
status_code = _check_video_url_request(url, timeout)
66-
if status_code == 200:
69+
if status_code in (200, 206):
6770
return True
6871
else:
69-
# Must be 404 since _check_video_url_request only returns 200 or 404.
70-
LOG.info(f"Video URL returned 404, skipping: {url}")
72+
# Must be 404 since _check_video_url_request only returns 200, 206, or 404.
73+
LOG.warning(f"Video URL returned 404, skipping: {url}")
7174
return False
7275

7376

0 commit comments

Comments
 (0)