Skip to content

Commit abc060e

Browse files
committed
perf: only copy files from cache when required
1 parent 841bacd commit abc060e

File tree

2 files changed

+177
-2
lines changed

2 files changed

+177
-2
lines changed

internal/cache/artifacts.go

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
package cache
1919

2020
import (
21+
"bytes"
22+
"crypto/sha256"
2123
"fmt"
2224
"io"
2325
"os"
@@ -35,7 +37,8 @@ func CopyArtifacts(baseDir, destDir string, outputs []string) error {
3537
src := filepath.Join(baseDir, output)
3638
dst := filepath.Join(destDir, output)
3739

38-
if err := copyFile(src, dst); err != nil {
40+
// Only copy if file doesn't exist or differs (optimization for re-caching)
41+
if _, err := copyFileIfNeeded(src, dst); err != nil {
3942
return fmt.Errorf("failed to copy %s: %w", output, err)
4043
}
4144
}
@@ -55,7 +58,8 @@ func RestoreArtifacts(cacheDir, destDir string, outputs []string) error {
5558
return fmt.Errorf("failed to create output directory: %w", err)
5659
}
5760

58-
if err := copyFile(src, dst); err != nil {
61+
// Only copy if file doesn't exist or differs
62+
if _, err := copyFileIfNeeded(src, dst); err != nil {
5963
return fmt.Errorf("failed to restore %s: %w", output, err)
6064
}
6165
}
@@ -250,3 +254,77 @@ func copyFile(src, dst string) error {
250254

251255
return os.Chmod(dst, srcInfo.Mode())
252256
}
257+
258+
// filesAreIdentical checks if two files have the same content
259+
// Uses a fast size check first, then hash comparison if needed
260+
func filesAreIdentical(file1, file2 string) bool {
261+
// Get file info for both files
262+
info1, err1 := os.Stat(file1)
263+
info2, err2 := os.Stat(file2)
264+
265+
// If either file doesn't exist or we can't stat it, they're not identical
266+
if err1 != nil || err2 != nil {
267+
return false
268+
}
269+
270+
// Quick check: if sizes differ, files are different
271+
if info1.Size() != info2.Size() {
272+
return false
273+
}
274+
275+
// If size is 0, both empty files are identical
276+
if info1.Size() == 0 {
277+
return true
278+
}
279+
280+
// For small files (< 64KB), compare content directly
281+
if info1.Size() < 65536 {
282+
content1, err1 := os.ReadFile(file1)
283+
content2, err2 := os.ReadFile(file2)
284+
if err1 != nil || err2 != nil {
285+
return false
286+
}
287+
return bytes.Equal(content1, content2)
288+
}
289+
290+
// For larger files, use hash comparison
291+
hash1, err1 := hashFile(file1)
292+
hash2, err2 := hashFile(file2)
293+
if err1 != nil || err2 != nil {
294+
return false
295+
}
296+
297+
return bytes.Equal(hash1, hash2)
298+
}
299+
300+
// hashFile computes SHA256 hash of a file
301+
func hashFile(path string) ([]byte, error) {
302+
file, err := os.Open(path)
303+
if err != nil {
304+
return nil, err
305+
}
306+
defer file.Close()
307+
308+
hash := sha256.New()
309+
if _, err := io.Copy(hash, file); err != nil {
310+
return nil, err
311+
}
312+
313+
return hash.Sum(nil), nil
314+
}
315+
316+
// copyFileIfNeeded copies a file only if destination doesn't exist or differs from source
317+
// Returns true if file was copied, false if copy was skipped
318+
func copyFileIfNeeded(src, dst string) (bool, error) {
319+
// Check if files are already identical
320+
if filesAreIdentical(src, dst) {
321+
return false, nil // Skip copy
322+
}
323+
324+
// Files differ or destination doesn't exist, perform copy
325+
if err := copyFile(src, dst); err != nil {
326+
return false, err
327+
}
328+
329+
return true, nil
330+
}

internal/cache/cache_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"os"
66
"path/filepath"
77
"testing"
8+
"time"
89

910
"github.com/Norgate-AV/spc/internal/config"
1011
"github.com/stretchr/testify/assert"
@@ -867,3 +868,99 @@ func TestCache_UshFiles_TargetSpecific(t *testing.T) {
867868
}
868869
}
869870
}
871+
872+
// TestCache_Restore_SkipsIdenticalFiles verifies that restoration only copies files
873+
// when necessary, skipping identical files to improve performance
874+
func TestCache_Restore_SkipsIdenticalFiles(t *testing.T) {
875+
cacheDir := t.TempDir()
876+
sourceDir := t.TempDir()
877+
sourceFile := filepath.Join(sourceDir, "test.usp")
878+
splsWorkDir := filepath.Join(sourceDir, "SPlsWork")
879+
880+
// Create source file
881+
err := os.WriteFile(sourceFile, []byte("test source"), 0o644)
882+
require.NoError(t, err)
883+
884+
// Create SPlsWork directory
885+
err = os.MkdirAll(splsWorkDir, 0o755)
886+
require.NoError(t, err)
887+
888+
// Create output files
889+
outputs := []string{"test.dll", "test.cs"}
890+
for _, output := range outputs {
891+
path := filepath.Join(splsWorkDir, output)
892+
err := os.WriteFile(path, []byte(fmt.Sprintf("content of %s", output)), 0o644)
893+
require.NoError(t, err)
894+
}
895+
896+
// Create .ush file
897+
ushFile := filepath.Join(sourceDir, "test.ush")
898+
err = os.WriteFile(ushFile, []byte("header content"), 0o644)
899+
require.NoError(t, err)
900+
901+
// Create cache and store
902+
cache, err := New(cacheDir)
903+
require.NoError(t, err)
904+
defer cache.Close()
905+
906+
cfg := &config.Config{Target: "3", UserFolders: []string{}}
907+
err = cache.Store(sourceFile, cfg, true)
908+
require.NoError(t, err)
909+
910+
// Get entry
911+
entry, err := cache.Get(sourceFile, cfg)
912+
require.NoError(t, err)
913+
require.NotNil(t, entry)
914+
915+
// First restoration (files don't exist) - should copy all files
916+
restoreDir1 := t.TempDir()
917+
err = cache.Restore(entry, restoreDir1)
918+
require.NoError(t, err)
919+
920+
// Verify files were created
921+
for _, output := range outputs {
922+
assert.FileExists(t, filepath.Join(restoreDir1, "SPlsWork", output))
923+
}
924+
assert.FileExists(t, filepath.Join(restoreDir1, "test.ush"))
925+
926+
// Get timestamps of restored files
927+
dllPath := filepath.Join(restoreDir1, "SPlsWork", "test.dll")
928+
infoBeforeSecondRestore, err := os.Stat(dllPath)
929+
require.NoError(t, err)
930+
931+
// Wait a moment to ensure timestamps would differ if file was rewritten
932+
time.Sleep(10 * time.Millisecond)
933+
934+
// Second restoration (files already exist and are identical) - should skip copying
935+
err = cache.Restore(entry, restoreDir1)
936+
require.NoError(t, err)
937+
938+
// Verify file timestamp didn't change (file wasn't copied)
939+
infoAfterSecondRestore, err := os.Stat(dllPath)
940+
require.NoError(t, err)
941+
assert.Equal(t, infoBeforeSecondRestore.ModTime(), infoAfterSecondRestore.ModTime(),
942+
"File should not be copied when it's already identical")
943+
944+
// Now modify a file to make it different
945+
err = os.WriteFile(dllPath, []byte("corrupted content"), 0o644)
946+
require.NoError(t, err)
947+
948+
infoAfterModification, err := os.Stat(dllPath)
949+
require.NoError(t, err)
950+
951+
// Third restoration (file exists but differs) - should copy the modified file
952+
time.Sleep(10 * time.Millisecond)
953+
err = cache.Restore(entry, restoreDir1)
954+
require.NoError(t, err)
955+
956+
// Verify file was restored (timestamp changed and content correct)
957+
infoAfterThirdRestore, err := os.Stat(dllPath)
958+
require.NoError(t, err)
959+
assert.NotEqual(t, infoAfterModification.ModTime(), infoAfterThirdRestore.ModTime(),
960+
"File should be copied when it differs from cached version")
961+
962+
// Verify content was correctly restored
963+
content, err := os.ReadFile(dllPath)
964+
require.NoError(t, err)
965+
assert.Equal(t, "content of test.dll", string(content), "Content should be restored correctly")
966+
}

0 commit comments

Comments
 (0)