Skip to content

Commit 00efec1

Browse files
committed
fixup! fix(download): verify etags for resumed requests
Signed-off-by: Marcel Guzik <[email protected]>
1 parent 8ce0723 commit 00efec1

File tree

3 files changed

+118
-87
lines changed

3 files changed

+118
-87
lines changed

crates/common/download/src/download.rs

Lines changed: 18 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
pub mod partial_response;
2+
use crate::download::partial_response::PartialResponse;
23
use crate::error::DownloadError;
34
use crate::error::ErrContext;
45
use anyhow::anyhow;
56
use backoff::future::retry_notify;
67
use backoff::ExponentialBackoff;
78
use certificate::CloudHttpConfig;
8-
use hyper::header;
99
use log::debug;
1010
use log::info;
1111
use log::warn;
@@ -198,14 +198,17 @@ impl Downloader {
198198
mut prev_response: Response,
199199
) -> Result<(), DownloadError> {
200200
loop {
201-
let offset = next_request_offset(&prev_response, file);
202-
let mut response = self.request_range_from(url, offset).await?;
203-
if was_resource_modified(&response, &prev_response) {
204-
file.seek(SeekFrom::Start(0)).unwrap();
205-
prev_response = response;
206-
continue;
207-
}
208-
let offset = partial_response::response_range_start(&response)?;
201+
let request_offset = next_request_offset(&prev_response, file)?;
202+
let mut response = self.request_range_from(url, request_offset).await?;
203+
let offset = match partial_response::response_range_start(&response, &prev_response)? {
204+
PartialResponse::CompleteContent => 0,
205+
PartialResponse::PartialContent(pos) => pos,
206+
PartialResponse::ResourceModified => {
207+
file.seek(SeekFrom::Start(0))
208+
.context("failed to seek in file".to_string())?;
209+
continue;
210+
}
211+
};
209212

210213
if offset != 0 {
211214
info!("Resuming file download at position={offset}");
@@ -357,46 +360,6 @@ impl Downloader {
357360
}
358361
}
359362

360-
/// Checks if the resource was modified between the current and previous response.
361-
///
362-
/// If the resource was updated, we should restart download and request full range of the new
363-
/// resource. Otherwise, a partial request can be used to resume the download.
364-
fn was_resource_modified(response: &Response, prev_response: &Response) -> bool {
365-
// etags in current and previous request must match
366-
let etag = response
367-
.headers()
368-
.get(header::ETAG)
369-
.and_then(|h| h.to_str().ok());
370-
let prev_etag = prev_response
371-
.headers()
372-
.get(header::ETAG)
373-
.and_then(|h| h.to_str().ok());
374-
375-
match (etag, prev_etag) {
376-
(None, None) => {
377-
// no etags in either request, assume resource is unchanged
378-
false
379-
}
380-
(None, Some(_)) | (Some(_), None) => {
381-
// previous request didn't have etag and this does or vice versa, abort
382-
true
383-
}
384-
(Some(etag), Some(prev_etag)) => {
385-
// Examples:
386-
// ETag: "xyzzy"
387-
// ETag: W/"xyzzy"
388-
// ETag: ""
389-
if etag.starts_with("W/") {
390-
// validator is weak, but in range requests tags must match using strong comparison
391-
// https://www.rfc-editor.org/rfc/rfc9110#entity.tag.comparison
392-
return true;
393-
}
394-
395-
etag != prev_etag
396-
}
397-
}
398-
}
399-
400363
/// Decides whether HTTP request error is retryable.
401364
fn reqwest_err_to_backoff(err: reqwest::Error) -> backoff::Error<reqwest::Error> {
402365
if err.is_timeout() || err.is_connect() {
@@ -469,20 +432,22 @@ pub fn try_pre_allocate_space(
469432
Ok(())
470433
}
471434

472-
fn next_request_offset(prev_response: &Response, file: &mut File) -> u64 {
435+
fn next_request_offset(prev_response: &Response, file: &mut File) -> Result<u64, DownloadError> {
473436
use hyper::header;
474437
use hyper::StatusCode;
475438
use std::io::Seek;
476-
let pos = file.stream_position().unwrap();
439+
let pos = file
440+
.stream_position()
441+
.context("failed to get cursor position".to_string())?;
477442
if prev_response.status() == StatusCode::PARTIAL_CONTENT
478443
|| prev_response
479444
.headers()
480445
.get(header::ACCEPT_RANGES)
481446
.is_some_and(|unit| unit == "bytes")
482447
{
483-
pos
448+
Ok(pos)
484449
} else {
485-
0
450+
Ok(0)
486451
}
487452
}
488453

crates/common/download/src/download/partial_response.rs

Lines changed: 100 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
//! Utilities related to parsing responses to partial requests.
22
3-
use reqwest::header;
43
use reqwest::header::HeaderValue;
54
use reqwest::StatusCode;
5+
use reqwest::{header, Response};
6+
7+
pub(super) enum PartialResponse {
8+
/// Server returned a partial content response starting at given position
9+
PartialContent(u64),
10+
11+
/// Server returned regular OK response, resume writing at 0
12+
CompleteContent,
13+
14+
/// Server returned partial content but resource was modified, request needs to be retried
15+
ResourceModified,
16+
}
617

718
/// Returns the position of the partial response in the resource.
819
///
@@ -11,26 +22,77 @@ use reqwest::StatusCode;
1122
/// Content-Range header. The server could also just ignore the Range header and
1223
/// respond with 200 OK, in which case we need to download the entire resource
1324
/// all over again.
14-
pub fn response_range_start(response: &reqwest::Response) -> Result<u64, InvalidResponseError> {
15-
let chunk_pos = match response.status() {
25+
pub(super) fn response_range_start(
26+
response: &reqwest::Response,
27+
prev_response: &Response,
28+
) -> Result<PartialResponse, InvalidResponseError> {
29+
match response.status() {
1630
// Complete response, seek to the beginning of the file
17-
StatusCode::OK => 0,
31+
StatusCode::OK => Ok(PartialResponse::CompleteContent),
1832

1933
// Partial response, the range might be different from what we
2034
// requested, so we need to parse it. Because we only request a single
2135
// range from the current position to the end of the document, we can
2236
// ignore multipart/byteranges media type.
23-
StatusCode::PARTIAL_CONTENT => partial_response_start_range(response)?,
37+
StatusCode::PARTIAL_CONTENT => {
38+
if was_resource_modified(response, prev_response) {
39+
return Ok(PartialResponse::ResourceModified);
40+
}
41+
let pos = partial_response_start_range(response)?;
42+
Ok(PartialResponse::PartialContent(pos))
43+
}
2444

2545
// We don't expect to receive any other 200-299 status code, but if we
2646
// do, treat it the same as OK
27-
status_code if status_code.is_success() => 0,
47+
status_code if status_code.is_success() => Ok(PartialResponse::CompleteContent),
48+
49+
status_code => Err(InvalidResponseError::UnexpectedStatus(status_code)),
50+
}
51+
}
52+
53+
/// Checks if the resource was modified between the current and previous response.
54+
///
55+
/// If the resource was updated, we should restart download and request full range of the new
56+
/// resource. Otherwise, a partial request can be used to resume the download.
57+
fn was_resource_modified(response: &Response, prev_response: &Response) -> bool {
58+
if response.status() != hyper::StatusCode::PARTIAL_CONTENT {
59+
// not using a partial request, don't care if it's modified or not
60+
return false;
61+
}
62+
63+
// etags in current and previous request must match
64+
let etag = response
65+
.headers()
66+
.get(header::ETAG)
67+
.and_then(|h| h.to_str().ok());
68+
let prev_etag = prev_response
69+
.headers()
70+
.get(header::ETAG)
71+
.and_then(|h| h.to_str().ok());
2872

29-
status_code => {
30-
return Err(InvalidResponseError::UnexpectedStatus(status_code));
73+
match (etag, prev_etag) {
74+
(None, None) => {
75+
// no etags in either request, assume resource is unchanged
76+
false
3177
}
32-
};
33-
Ok(chunk_pos)
78+
(None, Some(_)) | (Some(_), None) => {
79+
// previous request didn't have etag and this does or vice versa, abort
80+
true
81+
}
82+
(Some(etag), Some(prev_etag)) => {
83+
// Examples:
84+
// ETag: "xyzzy"
85+
// ETag: W/"xyzzy"
86+
// ETag: ""
87+
if etag.starts_with("W/") {
88+
// validator is weak, but in range requests tags must match using strong comparison
89+
// https://www.rfc-editor.org/rfc/rfc9110#entity.tag.comparison
90+
return true;
91+
}
92+
93+
etag != prev_etag
94+
}
95+
}
3496
}
3597

3698
#[derive(Debug, thiserror::Error)]
@@ -88,3 +150,31 @@ pub struct ContentRangeParseError {
88150
reason: &'static str,
89151
value: header::HeaderValue,
90152
}
153+
154+
#[cfg(test)]
155+
mod tests {
156+
use super::*;
157+
158+
#[test_case::test_case(Some(r#""xyzzy""#), Some(r#""xyzzy""#), false)]
159+
#[test_case::test_case(Some(r#"W/"xyzzy""#), Some(r#""xyzzy""#), true)]
160+
#[test_case::test_case(Some(r#""xyzzy""#), Some(r#"W/"xyzzy""#), true)]
161+
#[test_case::test_case(Some(r#""xyzzy1""#), Some(r#""xyzzy2""#), true)]
162+
#[test_case::test_case(None, None, false)]
163+
#[test_case::test_case(Some(r#""xyzzy1""#), None, true)]
164+
#[test_case::test_case(None, Some(r#""xyzzy2""#), true)]
165+
fn verifies_etags(etag1: Option<&'static str>, etag2: Option<&'static str>, modified: bool) {
166+
let mut response1 = http::Response::builder().status(StatusCode::PARTIAL_CONTENT);
167+
if let Some(etag) = etag1 {
168+
response1 = response1.header(http::header::ETAG, etag);
169+
}
170+
let response1 = response1.body("").unwrap().into();
171+
172+
let mut response2 = http::Response::builder().status(StatusCode::PARTIAL_CONTENT);
173+
if let Some(etag) = etag2 {
174+
response2 = response2.header(http::header::ETAG, etag);
175+
}
176+
let response2 = response2.body("").unwrap().into();
177+
178+
assert_eq!(was_resource_modified(&response1, &response2), modified);
179+
}
180+
}

crates/common/download/src/download/tests.rs

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use super::*;
22
use axum::Router;
3-
use http::StatusCode;
43
use hyper::header::AUTHORIZATION;
54
use rustls::pki_types::pem::PemObject;
65
use rustls::pki_types::PrivateKeyDer;
@@ -434,29 +433,6 @@ async fn downloader_error_shows_certificate_required_error_when_appropriate() {
434433
assert!(dbg!(format!("{err:#}")).contains("received fatal alert: CertificateRequired"));
435434
}
436435

437-
#[test_case::test_case(Some(r#""xyzzy""#), Some(r#""xyzzy""#), false)]
438-
#[test_case::test_case(Some(r#"W/"xyzzy""#), Some(r#""xyzzy""#), true)]
439-
#[test_case::test_case(Some(r#""xyzzy""#), Some(r#"W/"xyzzy""#), true)]
440-
#[test_case::test_case(Some(r#""xyzzy1""#), Some(r#""xyzzy2""#), true)]
441-
#[test_case::test_case(None, None, false)]
442-
#[test_case::test_case(Some(r#""xyzzy1""#), None, true)]
443-
#[test_case::test_case(None, Some(r#""xyzzy2""#), true)]
444-
fn verifies_etags(etag1: Option<&'static str>, etag2: Option<&'static str>, modified: bool) {
445-
let mut response1 = http::Response::builder().status(StatusCode::PARTIAL_CONTENT);
446-
if let Some(etag) = etag1 {
447-
response1 = response1.header(http::header::ETAG, etag);
448-
}
449-
let response1 = response1.body("").unwrap().into();
450-
451-
let mut response2 = http::Response::builder().status(StatusCode::PARTIAL_CONTENT);
452-
if let Some(etag) = etag2 {
453-
response2 = response2.header(http::header::ETAG, etag);
454-
}
455-
let response2 = response2.body("").unwrap().into();
456-
457-
assert_eq!(was_resource_modified(&response1, &response2), modified);
458-
}
459-
460436
fn create_file_with_size(size: usize) -> Result<NamedTempFile, anyhow::Error> {
461437
let mut file = NamedTempFile::new().unwrap();
462438
let data: String = "Some data!".into();

0 commit comments

Comments
 (0)