Skip to content

Commit 584eee8

Browse files
committed
fix(hasher): prevent double-download when syncing with auto_size
When syncing from hasher with auto_size larger than file size, files were downloaded twice: once by fs.Fingerprint() calling Hash() which triggered updateHashes(), and again for the actual transfer. Root cause: checkPartial() calls Fingerprint(ctx, src, fast=true) for generating stable partial upload suffixes. Fingerprint checks SlowHash before calling Hash(), but hasher didn't set SlowHash even when computing hashes requires downloading the entire file. Fix: - Set SlowHash=true when passHashes is empty (no fast pass-through hashes from base). Must be set AFTER Mask() to avoid being ANDed to false. - Add singleflight to deduplicate concurrent updateHashes() calls for the same file, preventing redundant downloads from parallel Hash() requests. When passHashes is non-empty (e.g., S3 with MD5), SlowHash remains false and Fingerprint uses the fast pass-through hash. When passHashes is empty (e.g., crypt/SFTP base), Fingerprint skips hashing entirely. Explicit rclone hashsum commands still work as they call Hash() directly. Signed-off-by: Anagh Kumar Baranwal <[email protected]>
1 parent af0d488 commit 584eee8

File tree

3 files changed

+26
-1
lines changed

3 files changed

+26
-1
lines changed

backend/hasher/hasher.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/rclone/rclone/fs/hash"
2121
"github.com/rclone/rclone/fs/list"
2222
"github.com/rclone/rclone/lib/kv"
23+
"golang.org/x/sync/singleflight"
2324
)
2425

2526
// Register with Fs
@@ -81,6 +82,8 @@ type Fs struct {
8182
slowHashes hash.Set // passed to the base and then cached
8283
autoHashes hash.Set // calculated in-house and cached
8384
keepHashes hash.Set // checksums to keep in cache (slow + auto)
85+
// concurrency control
86+
hashGroup singleflight.Group // deduplicates concurrent updateHashes calls
8487
}
8588

8689
var warnExperimental sync.Once
@@ -183,6 +186,11 @@ func NewFs(ctx context.Context, fsname, rpath string, cmap configmap.Mapper) (fs
183186
}
184187
f.features = stubFeatures.Fill(ctx, f).Mask(ctx, f.Fs).WrapsFs(f, f.Fs)
185188

189+
// Set SlowHash based on whether we have fast pass-through hashes.
190+
// Must be set AFTER Mask() to avoid being ANDed to false.
191+
// When passHashes is empty, computing hashes requires downloading the file.
192+
f.features.SlowHash = f.passHashes.Count() == 0
193+
186194
// Enable ListP always
187195
f.features.ListP = f.ListP
188196

backend/hasher/hasher_internal_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"os"
7+
"strings"
78
"testing"
89

910
"github.com/rclone/rclone/fs"
@@ -59,6 +60,19 @@ func (f *Fs) testUploadFromCrypt(t *testing.T) {
5960
require.NoError(t, err)
6061
assert.NotNil(t, dst)
6162

63+
// Verify fingerprint respects SlowHash (tests the double-download fix).
64+
// With SlowHash=true and fast=true, fingerprint should skip hash computation.
65+
if f.Features().SlowHash {
66+
hobj, err := f.NewObject(ctx, fileName)
67+
require.NoError(t, err)
68+
fp := fs.Fingerprint(ctx, hobj, true)
69+
// Fingerprint format is "size,modtime,hash" - with SlowHash=true and fast=true,
70+
// hash should be omitted, so we expect at most 2 comma-separated parts.
71+
parts := strings.Split(fp, ",")
72+
assert.LessOrEqual(t, len(parts), 2,
73+
"fast fingerprint should skip hash when SlowHash=true, got: %s", fp)
74+
}
75+
6276
// check that hash was created
6377
if f.opt.MaxAge > 0 {
6478
hash, err = f.getRawHash(ctx, hashType, fileName, anyFingerprint, longTime)

backend/hasher/object.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,10 @@ func (o *Object) Hash(ctx context.Context, hashType hash.Type) (hashVal string,
105105
return hashVal, err
106106
}
107107
if f.autoHashes.Contains(hashType) && o.Size() < int64(f.opt.AutoSize) {
108-
_ = o.updateHashes(ctx)
108+
// Use singleflight to prevent concurrent downloads for same file
109+
_, _, _ = f.hashGroup.Do(o.Remote(), func() (interface{}, error) {
110+
return nil, o.updateHashes(ctx)
111+
})
109112
if hashVal, err = o.getHash(ctx, hashType); err != nil {
110113
fs.Debugf(o, "auto %s = %q (%v)", hashType, hashVal, err)
111114
err = nil

0 commit comments

Comments
 (0)