Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 52 additions & 12 deletions cmd/ls.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import (
"github.com/spf13/cobra"

"github.com/substantialcattle5/sietch/internal/config"
"github.com/substantialcattle5/sietch/internal/deduplication"
"github.com/substantialcattle5/sietch/internal/fs"
lsui "github.com/substantialcattle5/sietch/internal/ls"
"github.com/substantialcattle5/sietch/util"
)

Expand Down Expand Up @@ -64,10 +66,17 @@ Examples:
long, _ := cmd.Flags().GetBool("long")
showTags, _ := cmd.Flags().GetBool("tags")
sortBy, _ := cmd.Flags().GetString("sort")
showDedup, _ := cmd.Flags().GetBool("dedup-stats")

// Filter and sort files
files := filterAndSortFiles(manifest.Files, filterPath, sortBy)

// Build chunk -> files index only if dedup stats requested
var chunkRefs map[string][]string
if showDedup {
chunkRefs = buildChunkIndex(manifest.Files)
}

// Display the files
if len(files) == 0 {
if filterPath != "" {
Expand All @@ -79,9 +88,9 @@ Examples:
}

if long {
displayLongFormat(files, showTags)
displayLongFormat(files, showTags, showDedup, chunkRefs)
} else {
displayShortFormat(files, showTags)
lsui.DisplayShortFormat(files, showTags, showDedup, chunkRefs)
}

return nil
Expand Down Expand Up @@ -125,7 +134,8 @@ func filterAndSortFiles(files []config.FileManifest, filterPath, sortBy string)
}

// Display files in long format with detailed information
func displayLongFormat(files []config.FileManifest, showTags bool) {
// showDedup = whether to include dedup stats; chunkRefs is map[chunkID][]filePaths
func displayLongFormat(files []config.FileManifest, showTags, showDedup bool, chunkRefs map[string][]string) {
// Create a tabwriter for aligned columns
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
defer w.Flush()
Expand Down Expand Up @@ -159,20 +169,47 @@ func displayLongFormat(files []config.FileManifest, showTags bool) {
len(file.Chunks),
file.Destination+file.FilePath)
}

// Dedup stats (print an indented stats line after the file line)
if showDedup && chunkRefs != nil {
sharedChunks, savedBytes, sharedWith := deduplication.ComputeDedupStatsForFile(file, chunkRefs)
// Format saved size
savedStr := util.HumanReadableSize(savedBytes)
// Format shared_with string with truncation
sharedWithStr := lsui.FormatSharedWith(sharedWith, 10)
// Print as indented info (not part of the tabwriter)
if len(sharedWith) == 0 {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", "", "", "", "") // ensure tabwriter alignment
fmt.Fprintf(w, " shared_chunks: %d\t saved: %s\n", sharedChunks, savedStr)
} else {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", "", "", "", "") // alignment spacer
fmt.Fprintf(w, " shared_chunks: %d\t saved: %s\t shared_with: %s\n", sharedChunks, savedStr, sharedWithStr)
}
}
}
}

// Display files in short format
func displayShortFormat(files []config.FileManifest, showTags bool) {
for _, file := range files {
path := file.Destination + file.FilePath
if showTags && len(file.Tags) > 0 {
tags := strings.Join(file.Tags, ", ")
fmt.Printf("%s [%s]\n", path, tags)
} else {
fmt.Println(path)
// buildChunkIndex creates a mapping chunkID -> []filePaths using the manifest file list.
// Uses ChunkRef.Hash as the chunk identifier.
func buildChunkIndex(files []config.FileManifest) map[string][]string {
chunkRefs := make(map[string][]string)
for _, f := range files {
fp := f.Destination + f.FilePath
for _, c := range f.Chunks {
// use the Hash field as the chunk identifier
chunkID := c.Hash
if chunkID == "" {
// fallback: if Hash is empty, use EncryptedHash
chunkID = c.EncryptedHash
}
if chunkID == "" {
// skip weird entries
continue
}
chunkRefs[chunkID] = append(chunkRefs[chunkID], fp)
}
}
return chunkRefs
}

func init() {
Expand All @@ -182,4 +219,7 @@ func init() {
lsCmd.Flags().BoolP("long", "l", false, "Use long listing format")
lsCmd.Flags().BoolP("tags", "t", false, "Show file tags")
lsCmd.Flags().StringP("sort", "s", "path", "Sort by: name, size, time, path")

// New dedup-stats flag
lsCmd.Flags().BoolP("dedup-stats", "d", false, "Show per-file deduplication statistics")
}
216 changes: 216 additions & 0 deletions cmd/ls_helpers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
package cmd

import (
"bytes"
"fmt"
"io"
"os"
"sort"
"strings"
"testing"
"time"

"github.com/substantialcattle5/sietch/internal/config"
dedup "github.com/substantialcattle5/sietch/internal/deduplication"
lsui "github.com/substantialcattle5/sietch/internal/ls"
)

// Helper: capture stdout while running fn()
func captureStdout(t *testing.T, fn func()) string {
old := os.Stdout
r, w, err := os.Pipe()
if err != nil {
t.Fatalf("pipe: %v", err)
}
os.Stdout = w

fn()

// Close writer and restore stdout before reading
w.Close()
var buf bytes.Buffer
_, err = io.Copy(&buf, r)
if err != nil {
os.Stdout = old
t.Fatalf("copy: %v", err)
}
os.Stdout = old
return buf.String()
}

func TestFilterAndSortFiles_Basic(t *testing.T) {
now := time.Now().UTC().Format(time.RFC3339)
f1 := config.FileManifest{FilePath: "a.txt", Destination: "docs/", Size: 100, ModTime: now}
f2 := config.FileManifest{FilePath: "b.txt", Destination: "docs/", Size: 200, ModTime: now}
f3 := config.FileManifest{FilePath: "c.txt", Destination: "data/", Size: 50, ModTime: now}

files := []config.FileManifest{f1, f2, f3}

// sort by name
out := filterAndSortFiles(files, "", "name")
if out[0].FilePath != "a.txt" || out[1].FilePath != "b.txt" || out[2].FilePath != "c.txt" {
t.Fatalf("unexpected order by name: %v", []string{out[0].FilePath, out[1].FilePath, out[2].FilePath})
}

// sort by size (desc)
out = filterAndSortFiles(files, "", "size")
if out[0].Size < out[1].Size || out[1].Size < out[2].Size {
t.Fatalf("unexpected order by size: %v", []int64{out[0].Size, out[1].Size, out[2].Size})
}

// filter by destination prefix
out = filterAndSortFiles(files, "docs/", "path")
if len(out) != 2 {
t.Fatalf("expected 2 files in docs/, got %d", len(out))
}
}

func TestBuildChunkIndexAndComputeDedupStats(t *testing.T) {
now := time.Now().UTC().Format(time.RFC3339)

// file1 has chunks c1 and c2
f1 := config.FileManifest{
FilePath: "a.txt",
Destination: "test/",
Size: 1024,
ModTime: now,
Chunks: []config.ChunkRef{
{Hash: "c1", EncryptedSize: 128},
{Hash: "c2", EncryptedSize: 256},
},
}
// file2 shares c1
f2 := config.FileManifest{
FilePath: "b.txt",
Destination: "test/",
Size: 1024,
ModTime: now,
Chunks: []config.ChunkRef{
{Hash: "c1", EncryptedSize: 128},
},
}
// file3 no share
f3 := config.FileManifest{
FilePath: "c.txt",
Destination: "other/",
Size: 512,
ModTime: now,
Chunks: []config.ChunkRef{
{Hash: "c3", EncryptedSize: 64},
},
}

files := []config.FileManifest{f1, f2, f3}

idx := buildChunkIndex(files)

// verify chunk index
if len(idx["c1"]) != 2 {
t.Fatalf("expected c1 refs length 2, got %d", len(idx["c1"]))
}
if len(idx["c2"]) != 1 {
t.Fatalf("expected c2 refs length 1, got %d", len(idx["c2"]))
}

sharedChunks, savedBytes, sharedWith := dedup.ComputeDedupStatsForFile(f1, idx)
if sharedChunks != 1 {
t.Fatalf("expected sharedChunks 1 for f1, got %d", sharedChunks)
}
if savedBytes != 128 {
t.Fatalf("expected savedBytes 128 for f1, got %d", savedBytes)
}
if len(sharedWith) != 1 {
t.Fatalf("expected sharedWith length 1, got %d", len(sharedWith))
}
if sharedWith[0] != "test/b.txt" {
t.Fatalf("expected shared with test/b.txt got %v", sharedWith)
}

// file with no shared chunks
sc, sb, sw := dedup.ComputeDedupStatsForFile(f3, idx)
if sc != 0 || sb != 0 || len(sw) != 0 {
t.Fatalf("expected no shared chunks for f3, got sc=%d sb=%d sw=%v", sc, sb, sw)
}
}

func TestFormatSharedWith_Truncation(t *testing.T) {
list := make([]string, 0, 12)
for i := 0; i < 12; i++ {
// use numeric suffixes to avoid rune/int confusion
list = append(list, fmt.Sprintf("file%d", i))
}
out := lsui.FormatSharedWith(list, 10)
if !strings.Contains(out, "(+2 more)") {
t.Fatalf("expected truncation info (+2 more) in '%s'", out)
}
}

func TestDisplayShortAndLongFormat_OutputContainsStats(t *testing.T) {
now := time.Now().UTC().Format(time.RFC3339)

f1 := config.FileManifest{
FilePath: "a.txt",
Destination: "test/",
Size: 100,
ModTime: now,
Chunks: []config.ChunkRef{{Hash: "c1", EncryptedSize: 128}},
}
f2 := config.FileManifest{
FilePath: "b.txt",
Destination: "test/",
Size: 200,
ModTime: now,
Chunks: []config.ChunkRef{{Hash: "c1", EncryptedSize: 128}},
}
files := []config.FileManifest{f1, f2}
chunkRefs := buildChunkIndex(files)

// short format capture
outShort := captureStdout(t, func() {
lsui.DisplayShortFormat(files, true, true, chunkRefs)
})
if !strings.Contains(outShort, "shared_chunks:") || !strings.Contains(outShort, "saved:") {
t.Fatalf("short output missing dedup info: %s", outShort)
}

// long format capture
outLong := captureStdout(t, func() {
displayLongFormat(files, false, true, chunkRefs)
})
if !strings.Contains(outLong, "SIZE") || !strings.Contains(outLong, "shared_chunks:") {
t.Fatalf("long output missing dedup info: %s", outLong)
}
}

func TestBuildChunkIndex_DeterministicOrder(t *testing.T) {
now := time.Now().UTC().Format(time.RFC3339)

f1 := config.FileManifest{
FilePath: "a.txt",
Destination: "x/",
Size: 10,
ModTime: now,
Chunks: []config.ChunkRef{{Hash: "c1", EncryptedSize: 10}},
}
f2 := config.FileManifest{
FilePath: "b.txt",
Destination: "y/",
Size: 20,
ModTime: now,
Chunks: []config.ChunkRef{{Hash: "c1", EncryptedSize: 10}},
}
files := []config.FileManifest{f1, f2}
idx := buildChunkIndex(files)

// ensure entries are present
if len(idx["c1"]) != 2 {
t.Fatalf("expected 2 refs for c1; got %d", len(idx["c1"]))
}

// ensure computeDedupStatsForFile sorts sharedWith deterministically
_, _, sw := dedup.ComputeDedupStatsForFile(f1, idx)
// sw should be sorted (we call sort.Strings), check monotonic property
if !sort.StringsAreSorted(sw) {
t.Fatalf("sharedWith not sorted: %v", sw)
}
}
61 changes: 61 additions & 0 deletions internal/deduplication/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package deduplication

import (
"sort"

"github.com/substantialcattle5/sietch/internal/config"
)

// ComputeDedupStatsForFile calculates dedup stats by consulting chunkRefs map.
// Uses EncryptedSize if present, otherwise Size, otherwise falls back to default chunk size.
func ComputeDedupStatsForFile(file config.FileManifest, chunkRefs map[string][]string) (sharedChunks int, savedBytes int64, sharedWith []string) {
// Default chunk size assumption (matches docs): 4 MiB
const defaultChunkSize int64 = 4 * 1024 * 1024

sharedWithSet := make(map[string]struct{})
filePath := file.Destination + file.FilePath

for _, c := range file.Chunks {
chunkID := c.Hash
if chunkID == "" {
chunkID = c.EncryptedHash
}
if chunkID == "" {
continue
}

refs, ok := chunkRefs[chunkID]
if !ok {
continue
}
if len(refs) > 1 {
sharedChunks++

// Prefer encrypted size if available (actual stored size), fallback to plaintext size
var chunkSize int64
if c.EncryptedSize > 0 {
chunkSize = c.EncryptedSize
} else if c.Size > 0 {
chunkSize = c.Size
} else {
chunkSize = defaultChunkSize
}
savedBytes += chunkSize

for _, other := range refs {
if other == filePath {
continue
}
sharedWithSet[other] = struct{}{}
}
}
}

sharedWith = make([]string, 0, len(sharedWithSet))
for s := range sharedWithSet {
sharedWith = append(sharedWith, s)
}
// sort for deterministic output
sort.Strings(sharedWith)
return
}
Loading
Loading