Skip to content

Commit 54d050c

Browse files
committed
First cut with basic set of features, well tested
0 parents  commit 54d050c

22 files changed

+960
-0
lines changed

.dockerignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/.idea/
2+
/.git/
3+
go-find-duplicates
4+
*.txt
5+
!default_exclusions.txt
6+
*.csv

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
/.idea/
2+
go-find-duplicates
3+
*.txt
4+
!default_exclusions.txt
5+
*.csv

Dockerfile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
FROM golang:1.16.3-alpine3.13 as builder
2+
3+
WORKDIR /opt/go-find-duplicates
4+
5+
ADD . ./
6+
7+
RUN go build
8+
9+
FROM alpine:3.13
10+
11+
RUN apk --no-cache add bash
12+
13+
COPY --from=builder /opt/go-find-duplicates/go-find-duplicates /bin

README.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Go Find Duplicates
2+
3+
A blazingly-fast simple-to-use tool to find duplicate files (photos, videos, music, documents etc.) on your computer,
4+
portable hard drives etc.
5+
6+
## How to install and use?
7+
8+
Two ways: (one direct, one through docker)
9+
10+
### Direct
11+
12+
To install:
13+
14+
1. Install Go version at least **1.16**
15+
* On Ubuntu: `snap install go`
16+
* On Mac: `brew install go`
17+
* For any other OS: [Go downloads page](https://golang.org/dl/)
18+
2. Run command:
19+
```bash
20+
go get github.com/m-manu/go-find-duplicates
21+
```
22+
23+
To use:
24+
25+
```bash
26+
go-find-duplicates {dir-1} {dir-2} ... {dir-n}
27+
```
28+
29+
For more options and help, run:
30+
31+
```bash
32+
go-find-duplicates -help
33+
```
34+
35+
### Through Docker
36+
37+
```bash
38+
docker run --rm -v /Volumes/PortableHD:/mnt/PortableHD manumk/go-find-duplicates:latest go-find-duplicates -output=print /mnt/PortableHD
39+
```
40+
41+
In above command:
42+
43+
* option `--rm` removes the container when it exits
44+
* option `-v` is mounts host directory `/Volumes/PortableHD` as `/mnt/PortableHD` inside the container

bytesutil/human_readable_size.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Package bytesutil helps you convert byte sizes (such as file size, data uploaded/downloaded etc.) to
2+
// human-readable strings. This allows conversion to decimal and binary formats.
3+
//
4+
// See: https://en.m.wikipedia.org/wiki/Byte#Multiple-byte_units
5+
package bytesutil
6+
7+
import "fmt"
8+
9+
// Constants for byte sizes in decimal and binary formats
10+
const (
11+
KILO int64 = 1000 // 1000 power 1 (10 power 3)
12+
KIBI int64 = 1024 // 1024 power 1 (2 power 10)
13+
MEGA = KILO * KILO // 1000 power 2 (10 power 6)
14+
MEBI = KIBI * KIBI // 1024 power 2 (2 power 20)
15+
GIGA = MEGA * KILO // 1000 power 3 (10 power 9)
16+
GIBI = MEBI * KIBI // 1024 power 3 (2 power 30)
17+
TERA = GIGA * KILO // 1000 power 4 (10 power 12)
18+
TEBI = GIBI * KIBI // 1024 power 4 (2 power 40)
19+
PETA = TERA * KILO // 1000 power 5 (10 power 15)
20+
PEBI = TEBI * KIBI // 1024 power 5 (2 power 50)
21+
EXA = PETA * KILO // 1000 power 6 (10 power 18)
22+
EXBI = PEBI * KIBI // 1024 power 6 (2 power 60)
23+
)
24+
25+
// BinaryFormat formats a byte size to a human readable string in binary format.
26+
// Uses binary prefixes. See: https://en.m.wikipedia.org/wiki/Binary_prefix
27+
//
28+
// For example,
29+
// fmt.Println(bytesutil.BinaryFormat(2140))
30+
// prints
31+
// 2.09 KiB
32+
func BinaryFormat(size int64) string {
33+
if size < 0 {
34+
return ""
35+
} else if size < KIBI {
36+
return fmt.Sprintf("%d B", size)
37+
} else if size < MEBI {
38+
return fmt.Sprintf("%.2f KiB", float64(size)/float64(KIBI))
39+
} else if size < GIBI {
40+
return fmt.Sprintf("%.2f MiB", float64(size)/float64(MEBI))
41+
} else if size < TEBI {
42+
return fmt.Sprintf("%.2f GiB", float64(size)/float64(GIBI))
43+
} else if size < PEBI {
44+
return fmt.Sprintf("%.2f TiB", float64(size)/float64(TEBI))
45+
} else if size < EXBI {
46+
return fmt.Sprintf("%.2f PiB", float64(size)/float64(PEBI))
47+
} else {
48+
return fmt.Sprintf("%.2f EiB", float64(size)/float64(EXBI))
49+
}
50+
}
51+
52+
// DecimalFormat formats a byte size to a human readable string in decimal format.
53+
// Uses metric prefixes. See: https://en.m.wikipedia.org/wiki/Metric_prefix
54+
//
55+
// For example,
56+
// fmt.Println(bytesutil.DecimalFormat(2140))
57+
// prints
58+
// 2.14KB
59+
func DecimalFormat(size int64) string {
60+
if size < 0 {
61+
return ""
62+
} else if size < KILO {
63+
return fmt.Sprintf("%d B", size)
64+
} else if size < MEGA {
65+
return fmt.Sprintf("%.2f KB", float64(size)/float64(KILO))
66+
} else if size < GIGA {
67+
return fmt.Sprintf("%.2f MB", float64(size)/float64(MEGA))
68+
} else if size < TERA {
69+
return fmt.Sprintf("%.2f GB", float64(size)/float64(GIGA))
70+
} else if size < PETA {
71+
return fmt.Sprintf("%.2f TB", float64(size)/float64(TERA))
72+
} else if size < EXA {
73+
return fmt.Sprintf("%.2f PB", float64(size)/float64(PETA))
74+
} else {
75+
return fmt.Sprintf("%.2f EB", float64(size)/float64(EXA))
76+
}
77+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package bytesutil
2+
3+
import (
4+
"github.com/stretchr/testify/assert"
5+
"testing"
6+
)
7+
8+
func TestFormats(t *testing.T) {
9+
tests := map[int64][2]string{
10+
-1: {"", ""},
11+
0: {"0 B", "0 B"},
12+
1_023: {"1023 B", "1.02 KB"},
13+
2_140: {"2.09 KiB", "2.14 KB"},
14+
2_828_382: {"2.70 MiB", "2.83 MB"},
15+
2_341_234_123_412_341_234: {"2.03 EiB", "2.34 EB"},
16+
}
17+
for value, expectedValues := range tests {
18+
assert.Equal(t, expectedValues[0], BinaryFormat(value))
19+
assert.Equal(t, expectedValues[1], DecimalFormat(value))
20+
}
21+
}

default_exclusions.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
.DS_Store
2+
System Volume Information
3+
$RECYCLE.BIN
4+
desktop.ini
5+
Thumbs.db
6+
.picasaoriginals
7+
.picasa.ini
8+
.Trashes
9+
.TemporaryItems
10+
.Spotlight-V100
11+
.fseventsd
12+
_PAlbTN

entity/digest_to_files.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package entity
2+
3+
import (
4+
"sync"
5+
)
6+
7+
// DigestToFiles is a multi-map with FileDigest keys and string values.
8+
// Writes to this is goroutine-safe.
9+
type DigestToFiles struct {
10+
mx *sync.Mutex
11+
data map[FileDigest][]string
12+
}
13+
14+
// NewDigestToFiles creates new DigestToFiles
15+
func NewDigestToFiles(size int) (m *DigestToFiles) {
16+
return &DigestToFiles{
17+
data: make(map[FileDigest][]string, size),
18+
mx: &sync.Mutex{},
19+
}
20+
}
21+
22+
// Set sets a value for the key
23+
func (m *DigestToFiles) Set(key FileDigest, value string) {
24+
m.mx.Lock()
25+
m.data[key] = append(m.data[key], value)
26+
m.mx.Unlock()
27+
}
28+
29+
// Remove removes entry in the map
30+
func (m *DigestToFiles) Remove(fd FileDigest) {
31+
delete(m.data, fd)
32+
}
33+
34+
// Map returns internal map to iterate over
35+
func (m *DigestToFiles) Map() map[FileDigest][]string {
36+
return m.data
37+
}
38+
39+
// Size returns size of map
40+
func (m *DigestToFiles) Size() int {
41+
return len(m.data)
42+
}

entity/file_digest.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package entity
2+
3+
import (
4+
"fmt"
5+
"github.com/m-manu/go-find-duplicates/bytesutil"
6+
)
7+
8+
// FileDigest contains properties of a file that makes the file unique to a very high degree of confidence
9+
type FileDigest struct {
10+
FileExtension string
11+
FileSize int64
12+
FileFuzzyHash string
13+
}
14+
15+
func (f FileDigest) String() string {
16+
return fmt.Sprintf("%v/%v/%v", f.FileExtension, f.FileFuzzyHash, bytesutil.BinaryFormat(f.FileSize))
17+
}

entity/file_ext_and_size.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package entity
2+
3+
import "fmt"
4+
5+
// FileExtAndSize is a struct of file extension and file size
6+
type FileExtAndSize struct {
7+
FileExtension string
8+
FileSize int64
9+
}
10+
11+
func (f FileExtAndSize) String() string {
12+
return fmt.Sprintf("%v/%v", f.FileExtension, f.FileSize)
13+
}
14+
15+
// FileExtAndSizeToFiles is a multi-map of FileExtAndSize key and string values
16+
type FileExtAndSizeToFiles map[FileExtAndSize][]string

0 commit comments

Comments
 (0)