Skip to content

Commit f2fb061

Browse files
committed
Fix parallel downloads of the same image
When starting multiple vms in parallel using the same uncached yet image, we downloaded the data to the same temporary file (data.tmp) and wrote metadata to the same files (url, time, type, {digest}.digest). We could fail in many ways: - One process removes the cache directory when another tries to rename a temporary file to the destination file. - One process fails to remove the cache directory since another process started to download and the directory is not empty. - Corrupting data when writing to the same file from multiple processes. Mostly likely when writing downloaded data. Fixes: - Do not remove the cache directory before the download. This avoids the failing to remove non-empty directory, and failure to rename a temporary file. - Write all data to temporary files, replacing the destination file when the write is complete. This avoid corrupted data. - Do not remove destination file when creating a new temporary file or renaming since creating a file truncate existing file and renaming replace it. Issues: - On windows os.Rename is not atomic, but it should work when using NTFS. - On failures we may have lefover temporary files ({name}.tmp.{pid}). Fixes #2722 Signed-off-by: Nir Soffer <[email protected]>
1 parent 5a98e62 commit f2fb061

File tree

1 file changed

+43
-13
lines changed

1 file changed

+43
-13
lines changed

pkg/downloader/downloader.go

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"os/exec"
1313
"path"
1414
"path/filepath"
15+
"strconv"
1516
"strings"
1617
"time"
1718

@@ -262,14 +263,11 @@ func Download(ctx context.Context, local, remote string, opts ...Opt) (*Result,
262263
return res, nil
263264
}
264265
}
265-
if err := os.RemoveAll(shad); err != nil {
266-
return nil, err
267-
}
268266
if err := os.MkdirAll(shad, 0o700); err != nil {
269267
return nil, err
270268
}
271269
shadURL := filepath.Join(shad, "url")
272-
if err := os.WriteFile(shadURL, []byte(remote), 0o644); err != nil {
270+
if err := atomicWrite(shadURL, []byte(remote), 0o644); err != nil {
273271
return nil, err
274272
}
275273
if err := downloadHTTP(ctx, shadData, shadTime, shadType, remote, o.description, o.expectedDigest); err != nil {
@@ -280,7 +278,7 @@ func Download(ctx context.Context, local, remote string, opts ...Opt) (*Result,
280278
return nil, err
281279
}
282280
if shadDigest != "" && o.expectedDigest != "" {
283-
if err := os.WriteFile(shadDigest, []byte(o.expectedDigest.String()), 0o644); err != nil {
281+
if err := atomicWrite(shadDigest, []byte(o.expectedDigest.String()), 0o644); err != nil {
284282
return nil, err
285283
}
286284
}
@@ -600,10 +598,8 @@ func downloadHTTP(ctx context.Context, localPath, lastModified, contentType, url
600598
return fmt.Errorf("downloadHTTP: got empty localPath")
601599
}
602600
logrus.Debugf("downloading %q into %q", url, localPath)
603-
localPathTmp := localPath + ".tmp"
604-
if err := os.RemoveAll(localPathTmp); err != nil {
605-
return err
606-
}
601+
602+
localPathTmp := perProcessTempfile(localPath)
607603
fileWriter, err := os.Create(localPathTmp)
608604
if err != nil {
609605
return err
@@ -616,13 +612,13 @@ func downloadHTTP(ctx context.Context, localPath, lastModified, contentType, url
616612
}
617613
if lastModified != "" {
618614
lm := resp.Header.Get("Last-Modified")
619-
if err := os.WriteFile(lastModified, []byte(lm), 0o644); err != nil {
615+
if err := atomicWrite(lastModified, []byte(lm), 0o644); err != nil {
620616
return err
621617
}
622618
}
623619
if contentType != "" {
624620
ct := resp.Header.Get("Content-Type")
625-
if err := os.WriteFile(contentType, []byte(ct), 0o644); err != nil {
621+
if err := atomicWrite(contentType, []byte(ct), 0o644); err != nil {
626622
return err
627623
}
628624
}
@@ -674,10 +670,44 @@ func downloadHTTP(ctx context.Context, localPath, lastModified, contentType, url
674670
if err := fileWriter.Close(); err != nil {
675671
return err
676672
}
677-
if err := os.RemoveAll(localPath); err != nil {
673+
674+
return os.Rename(localPathTmp, localPath)
675+
}
676+
677+
// To allow parallel download we use a per-process unique suffix for tempoary
678+
// files. Renaming the temporary file to the final file is safe without
679+
// synchronization on posix.
680+
// https://github.com/lima-vm/lima/issues/2722
681+
func perProcessTempfile(path string) string {
682+
return path + ".tmp." + strconv.FormatInt(int64(os.Getpid()), 10)
683+
}
684+
685+
// atomicWrite writes data to path, creating a new file or replacing existing
686+
// one. Multiple processess can write to the same path safely. Safe on posix and
687+
// likely safe on windows when using NTFS.
688+
func atomicWrite(path string, data []byte, perm os.FileMode) error {
689+
tmpPath := perProcessTempfile(path)
690+
tmp, err := os.OpenFile(tmpPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
691+
if err != nil {
692+
return err
693+
}
694+
defer func() {
695+
if err != nil {
696+
tmp.Close()
697+
os.RemoveAll(tmpPath)
698+
}
699+
}()
700+
if _, err = tmp.Write(data); err != nil {
678701
return err
679702
}
680-
return os.Rename(localPathTmp, localPath)
703+
if err = tmp.Sync(); err != nil {
704+
return err
705+
}
706+
if err = tmp.Close(); err != nil {
707+
return err
708+
}
709+
err = os.Rename(tmpPath, path)
710+
return err
681711
}
682712

683713
// CacheEntries returns a map of cache entries.

0 commit comments

Comments
 (0)