Skip to content

Commit 75dea16

Browse files
authored
Implementation of the hash interface for TLSH (#25)
1 parent dac97c2 commit 75dea16

File tree

6 files changed

+232
-60
lines changed

6 files changed

+232
-60
lines changed

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,16 @@ benchcmp:
5151
git stash save "stashing for benchcmp"
5252
@go test -test.benchmem=true -run=NONE -bench=. ./... > bench_head.test
5353
git stash pop
54-
benchcmp bench_head.test bench_current.test
54+
benchstat bench_head.test bench_current.test
55+
56+
.PHONY: benchbcmp
57+
benchbcmp:
58+
# ensure no govenor weirdness
59+
# sudo cpufreq-set -g performance
60+
go test -test.benchmem=true -run=NONE -bench=. ./... > bench_current.test
61+
git stash save "stashing for benchcmp"
62+
git checkout main
63+
@go test -test.benchmem=true -run=NONE -bench=. ./... > bench_main.test
64+
git checkout -
65+
git stash pop
66+
benchstat bench_main.test bench_current.test

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
![Workflow](https://github.com/glaslos/ssdeep/actions/workflows/go.yml/badge.svg)
22
[![Go Report Card](https://goreportcard.com/badge/github.com/glaslos/tlsh)](https://goreportcard.com/report/github.com/glaslos/tlsh)
3-
[![Go Reference](https://pkg.go.dev/badge/badge/glaslos/tlsh.svg)](https://pkg.go.dev/badge/glaslos/tlsh)
3+
[![Go Reference](https://pkg.go.dev/badge/badge/glaslos/tlsh.svg)](https://pkg.go.dev/github.com/glaslos/tlsh)
44

55
# TLSH
66
Trend Micro Locality Sensitive Hash lib in Golang

distance.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ func digestDistance(x [codeSize]byte, y [codeSize]byte) (diff int) {
2525
}
2626

2727
// diffTotal calculates diff between two Tlsh hashes for hash header and body.
28-
func diffTotal(a, b *Tlsh, lenDiff bool) (diff int) {
28+
func diffTotal(a, b *TLSH, lenDiff bool) int {
29+
diff := 0
2930
if lenDiff {
3031
lDiff := modDiff(a.lValue, b.lValue, 256)
3132

hash.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package tlsh
2+
3+
import (
4+
"hash"
5+
)
6+
7+
var _ hash.Hash = &TLSH{}
8+
9+
func (t *TLSH) Reset() {
10+
t.checksum = byte(0)
11+
t.lValue = 0
12+
t.q1Ratio = 0
13+
t.q2Ratio = 0
14+
t.qRatio = 0
15+
t.code = [codeSize]byte{}
16+
t.state = chunkState{
17+
buckets: [numBuckets]uint{},
18+
chunk: [windowLength]byte{},
19+
chunkSlice: []byte{},
20+
fileSize: 0,
21+
checksum: byte(0),
22+
chunk3: &[3]byte{},
23+
}
24+
}
25+
26+
func (t *TLSH) BlockSize() int {
27+
return 1
28+
}
29+
30+
func (t *TLSH) Size() int {
31+
return len(t.Binary())
32+
}
33+
34+
func (t *TLSH) Sum(b []byte) []byte {
35+
q1, q2, q3 := quartilePoints(t.state.buckets)
36+
q1Ratio := byte(float32(q1)*100/float32(q3)) % 16
37+
q2Ratio := byte(float32(q2)*100/float32(q3)) % 16
38+
qRatio := ((q1Ratio & 0xF) << 4) | (q2Ratio & 0xF)
39+
40+
biHash := bucketsBinaryRepresentation(t.state.buckets, q1, q2, q3)
41+
42+
*t = *new(t.state.checksum, lValue(t.state.fileSize), q1Ratio, q2Ratio, qRatio, biHash, t.state)
43+
return t.Binary()
44+
}
45+
46+
func (t *TLSH) Write(p []byte) (int, error) {
47+
t.state.fileSize += len(p)
48+
if len(t.state.chunkSlice) < windowLength {
49+
missing := windowLength - len(t.state.chunkSlice)
50+
switch {
51+
case len(p) < missing:
52+
t.state.chunkSlice = append(t.state.chunkSlice, p...)
53+
return len(p), nil
54+
default:
55+
t.state.chunkSlice = append(t.state.chunkSlice, p[0:missing]...)
56+
p = p[missing:]
57+
copy(t.state.chunk[:], t.state.chunkSlice[0:5])
58+
t.state.chunk = reverse(t.state.chunk)
59+
t.state.process()
60+
}
61+
}
62+
63+
for _, b := range p {
64+
t.state.chunk[0] = b
65+
t.state.process()
66+
}
67+
68+
return len(p), nil
69+
}

hash_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package tlsh
2+
3+
import (
4+
"fmt"
5+
"hash"
6+
"testing"
7+
)
8+
9+
func TestHashinterface(t *testing.T) {
10+
h := New()
11+
var _ hash.Hash = h
12+
t.Log(h.BlockSize())
13+
h.Reset()
14+
n, err := h.Write([]byte("hello"))
15+
if err != nil {
16+
t.Error(err)
17+
}
18+
t.Log(n)
19+
t.Log(h.Size())
20+
hash := h.Sum(nil)
21+
t.Log(hash)
22+
t.Log(fmt.Sprintf("%x", hash[:]))
23+
}
24+
25+
func TestHashWrite(t *testing.T) {
26+
// hash using the hash.Hash interface methods
27+
h1 := New()
28+
h1.Write([]byte("1234"))
29+
h1.Write([]byte("11"))
30+
h1.Write([]byte("1111111"))
31+
t.Log(fmt.Sprintf("h1: %x", h1.Sum(nil)))
32+
t.Logf("checksum h1: %d, %x", h1.state.checksum, h1.checksum)
33+
34+
// hash from read
35+
h2, err := HashBytes([]byte("1234111111111"))
36+
if err != nil {
37+
t.Error(err)
38+
}
39+
t.Logf("checksum h2: %d, %x", h2.state.checksum, h2.checksum)
40+
t.Log(fmt.Sprintf("h2: %x", h2.Binary()))
41+
42+
// compare hashes
43+
if h1.state.fileSize != h2.state.fileSize {
44+
t.Errorf("file size mismatch: %d != %d", h1.state.fileSize, h2.state.fileSize)
45+
}
46+
if h1.checksum != h2.checksum {
47+
t.Errorf("checksum mismatch: %x != %x", h1.checksum, h2.checksum)
48+
}
49+
diff := h1.Diff(h2)
50+
if diff != 0 {
51+
t.Errorf("hashes differ by: %d", diff)
52+
}
53+
}

0 commit comments

Comments
 (0)