Skip to content

Commit 0d90093

Browse files
committed
don't attempt range request without object size
The range logic was being erroneously applied when there was no Content-Length returned by the HEAD call. Clone the http header when copying them to new requests, so that changes are not inadvertently reflected in the caller. The GetFile method still has the unfortunate behavior of not verifying the destination file for the range request (which it currently can't, because there is no metadata to compare). This means that a call to replace an existing file when the server supports byte-range, will either succeed if the new length is <= the previous, or corrupt the file by appending new bytes. Silent corruption can be avoided by added a `checksum` parameter to the go-getter request, but will still result in failure to download. Update the HttpGetter.GetFile method docs to reflect this behavior. In most cases however the caller is not looking at the HttpGetter itself, as go-getter is attempting to abstract this away. This means there needs to be global support in the Client to make this a viable option. How to add this can be decided separate from this fix.
1 parent d330e2a commit 0d90093

File tree

2 files changed

+79
-10
lines changed

2 files changed

+79
-10
lines changed

get_http.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"net/url"
1010
"os"
1111
"path/filepath"
12-
"strconv"
1312
"strings"
1413

1514
safetemp "github.com/hashicorp/go-safetemp"
@@ -89,7 +88,7 @@ func (g *HttpGetter) Get(dst string, u *url.URL) error {
8988
}
9089

9190
if g.Header != nil {
92-
req.Header = g.Header
91+
req.Header = g.Header.Clone()
9392
}
9493

9594
resp, err := g.Client.Do(req)
@@ -131,6 +130,12 @@ func (g *HttpGetter) Get(dst string, u *url.URL) error {
131130
return g.getSubdir(ctx, dst, source, subDir)
132131
}
133132

133+
// GetFile fetches the file from src and stores it at dst.
134+
// If the server supports Accept-Range, HttpGetter will attempt a range
135+
// request. This means it is the caller's responsibility to ensure that an
136+
// older version of the destination file does not exist, else it will be either
137+
// falsely identified as being replaced, or corrupted with extra bytes
138+
// appended.
134139
func (g *HttpGetter) GetFile(dst string, src *url.URL) error {
135140
ctx := g.Context()
136141
if g.Netrc {
@@ -139,7 +144,6 @@ func (g *HttpGetter) GetFile(dst string, src *url.URL) error {
139144
return err
140145
}
141146
}
142-
143147
// Create all the parent directories if needed
144148
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
145149
return err
@@ -165,21 +169,20 @@ func (g *HttpGetter) GetFile(dst string, src *url.URL) error {
165169
return err
166170
}
167171
if g.Header != nil {
168-
req.Header = g.Header
172+
req.Header = g.Header.Clone()
169173
}
170174
headResp, err := g.Client.Do(req)
171-
if err == nil && headResp != nil {
175+
if err == nil {
172176
headResp.Body.Close()
173177
if headResp.StatusCode == 200 {
174178
// If the HEAD request succeeded, then attempt to set the range
175179
// query if we can.
176-
if headResp.Header.Get("Accept-Ranges") == "bytes" {
180+
if headResp.Header.Get("Accept-Ranges") == "bytes" && headResp.ContentLength >= 0 {
177181
if fi, err := f.Stat(); err == nil {
178-
if _, err = f.Seek(0, os.SEEK_END); err == nil {
179-
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", fi.Size()))
182+
if _, err = f.Seek(0, io.SeekEnd); err == nil {
180183
currentFileSize = fi.Size()
181-
totalFileSize, _ := strconv.ParseInt(headResp.Header.Get("Content-Length"), 10, 64)
182-
if currentFileSize >= totalFileSize {
184+
req.Header.Set("Range", fmt.Sprintf("bytes=%d-", currentFileSize))
185+
if currentFileSize >= headResp.ContentLength {
183186
// file already present
184187
return nil
185188
}

get_http_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,57 @@ func TestHttpGetter_resume(t *testing.T) {
222222
}
223223
}
224224

225+
// The server may support Byte-Range, but has no size for the requested object
226+
func TestHttpGetter_resumeNoRange(t *testing.T) {
227+
load := []byte(testHttpMetaStr)
228+
sha := sha256.New()
229+
if n, err := sha.Write(load); n != len(load) || err != nil {
230+
t.Fatalf("sha write failed: %d, %s", n, err)
231+
}
232+
checksum := hex.EncodeToString(sha.Sum(nil))
233+
downloadFrom := len(load) / 2
234+
235+
ln := testHttpServer(t)
236+
defer ln.Close()
237+
238+
dst := tempDir(t)
239+
defer os.RemoveAll(dst)
240+
241+
dst = filepath.Join(dst, "..", "range")
242+
f, err := os.Create(dst)
243+
if err != nil {
244+
t.Fatalf("create: %v", err)
245+
}
246+
if n, err := f.Write(load[:downloadFrom]); n != downloadFrom || err != nil {
247+
t.Fatalf("partial file write failed: %d, %s", n, err)
248+
}
249+
if err := f.Close(); err != nil {
250+
t.Fatalf("close failed: %s", err)
251+
}
252+
253+
u := url.URL{
254+
Scheme: "http",
255+
Host: ln.Addr().String(),
256+
Path: "/no-range",
257+
RawQuery: "checksum=" + checksum,
258+
}
259+
t.Logf("url: %s", u.String())
260+
261+
// Finish getting it!
262+
if err := GetFile(dst, u.String()); err != nil {
263+
t.Fatalf("finishing download should not error: %v", err)
264+
}
265+
266+
b, err := ioutil.ReadFile(dst)
267+
if err != nil {
268+
t.Fatalf("readfile failed: %v", err)
269+
}
270+
271+
if string(b) != string(load) {
272+
t.Fatalf("file differs: got:\n%s\n expected:\n%s\n", string(b), string(load))
273+
}
274+
}
275+
225276
func TestHttpGetter_file(t *testing.T) {
226277
ln := testHttpServer(t)
227278
defer ln.Close()
@@ -351,6 +402,7 @@ func testHttpServer(t *testing.T) net.Listener {
351402
mux.HandleFunc("/meta-subdir", testHttpHandlerMetaSubdir)
352403
mux.HandleFunc("/meta-subdir-glob", testHttpHandlerMetaSubdirGlob)
353404
mux.HandleFunc("/range", testHttpHandlerRange)
405+
mux.HandleFunc("/no-range", testHttpHandlerNoRange)
354406

355407
var server http.Server
356408
server.Handler = mux
@@ -428,6 +480,20 @@ func testHttpHandlerRange(w http.ResponseWriter, r *http.Request) {
428480
}
429481
}
430482

483+
func testHttpHandlerNoRange(w http.ResponseWriter, r *http.Request) {
484+
load := []byte(testHttpMetaStr)
485+
switch r.Method {
486+
case "HEAD":
487+
// we support range, but the object size isn't known
488+
w.Header().Add("accept-ranges", "bytes")
489+
default:
490+
if r.Header.Get("Range") != "" {
491+
http.Error(w, "range not supported", http.StatusBadRequest)
492+
}
493+
w.Write(load)
494+
}
495+
}
496+
431497
const testHttpMetaStr = `
432498
<html>
433499
<head>

0 commit comments

Comments
 (0)