Skip to content

Commit 6f01a03

Browse files
tvandijckljjgdfsCopilot
authored
Make it consistent with trendmicro latest implementation (#27)
* Make it consistent with trendmicro latest implementation: returning invalid hash when q3 is zero or file size is less than 50 bytes * adapt hash interface to the latest fix * t.Logf and gofmt * make sure on windows we don't get crlf for these test files. * Modifying the receiver (*t) in place and then returning its Binary() creates confusing side effects. Consider returning a zero hash directly without modifying the receiver state. Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: ljjgdfs <ljjgdfs@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 75dea16 commit 6f01a03

File tree

7 files changed

+69
-10
lines changed

7 files changed

+69
-10
lines changed

hash.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ func (t *TLSH) Size() int {
3333

3434
func (t *TLSH) Sum(b []byte) []byte {
3535
q1, q2, q3 := quartilePoints(t.state.buckets)
36+
if q3 == 0 || t.state.fileSize < 50 {
37+
*t = TLSH{
38+
state: t.state,
39+
}
40+
// Return a zero hash directly without modifying the receiver
41+
return make([]byte, codeSize)
42+
}
3643
q1Ratio := byte(float32(q1)*100/float32(q3)) % 16
3744
q2Ratio := byte(float32(q2)*100/float32(q3)) % 16
3845
qRatio := ((q1Ratio & 0xF) << 4) | (q2Ratio & 0xF)

hash_test.go

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package tlsh
22

33
import (
4-
"fmt"
54
"hash"
65
"testing"
76
)
@@ -19,7 +18,7 @@ func TestHashinterface(t *testing.T) {
1918
t.Log(h.Size())
2019
hash := h.Sum(nil)
2120
t.Log(hash)
22-
t.Log(fmt.Sprintf("%x", hash[:]))
21+
t.Logf("%x", hash[:])
2322
}
2423

2524
func TestHashWrite(t *testing.T) {
@@ -28,16 +27,16 @@ func TestHashWrite(t *testing.T) {
2827
h1.Write([]byte("1234"))
2928
h1.Write([]byte("11"))
3029
h1.Write([]byte("1111111"))
31-
t.Log(fmt.Sprintf("h1: %x", h1.Sum(nil)))
30+
t.Logf("h1: %x", h1.Sum(nil))
3231
t.Logf("checksum h1: %d, %x", h1.state.checksum, h1.checksum)
3332

3433
// hash from read
3534
h2, err := HashBytes([]byte("1234111111111"))
36-
if err != nil {
37-
t.Error(err)
35+
if err == nil {
36+
t.Error("Missing error of less than 50 bytes")
3837
}
3938
t.Logf("checksum h2: %d, %x", h2.state.checksum, h2.checksum)
40-
t.Log(fmt.Sprintf("h2: %x", h2.Binary()))
39+
t.Logf("h2: %x", h2.Binary())
4140

4241
// compare hashes
4342
if h1.state.fileSize != h2.state.fileSize {
@@ -50,4 +49,28 @@ func TestHashWrite(t *testing.T) {
5049
if diff != 0 {
5150
t.Errorf("hashes differ by: %d", diff)
5251
}
52+
53+
h1.Write([]byte("1234567890"))
54+
h1.Write([]byte("1234567890"))
55+
h1.Write([]byte("1234567890"))
56+
h1.Write([]byte("1234567890"))
57+
t.Logf("h1: %x", h1.Sum(nil))
58+
t.Logf("checksum h1: %d, %x", h1.state.checksum, h1.checksum)
59+
s := "1234111111111" + "1234567890" + "1234567890" + "1234567890" + "1234567890"
60+
h3, err := HashBytes([]byte(s))
61+
if err != nil {
62+
t.Error(err)
63+
}
64+
t.Logf("checksum h3: %d, %x", h3.state.checksum, h3.checksum)
65+
t.Logf("h3: %x", h3.Binary())
66+
if h1.state.fileSize != h3.state.fileSize {
67+
t.Errorf("file size mismatch: %d != %d", h1.state.fileSize, h3.state.fileSize)
68+
}
69+
if h1.checksum != h3.checksum {
70+
t.Errorf("checksum mismatch: %x != %x", h1.checksum, h3.checksum)
71+
}
72+
diff = h1.Diff(h3)
73+
if diff != 0 {
74+
t.Errorf("hashes differ by: %d", diff)
75+
}
5376
}

tests/.gitattributes

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
* text eol=lf
2+
3+
*.jpg binary
4+
*.png binary
5+
*.exe binary
6+

tests/test_file_49bytes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
MIT License is so cool license that I can't imag

tests/test_file_q3zero

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1234560000000000000000000000000000000000000000000

tlsh.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"bufio"
1919
"bytes"
2020
"encoding/hex"
21+
"errors"
2122
"io"
2223
"math"
2324
"os"
@@ -336,8 +337,26 @@ func hashCalculate(r FuzzyReader) (*TLSH, error) {
336337
if err != nil {
337338
return &TLSH{}, err
338339
}
340+
if fileSize < 50 {
341+
return &TLSH{
342+
state: chunkState{
343+
buckets: buckets,
344+
fileSize: fileSize,
345+
checksum: checksum,
346+
},
347+
}, errors.New("less than 50 bytes")
348+
}
339349

340350
q1, q2, q3 := quartilePoints(buckets)
351+
if q3 == 0 {
352+
return &TLSH{
353+
state: chunkState{
354+
buckets: buckets,
355+
fileSize: fileSize,
356+
checksum: checksum,
357+
},
358+
}, errors.New("q3 is zero")
359+
}
341360
q1Ratio := byte(float32(q1)*100/float32(q3)) % 16
342361
q2Ratio := byte(float32(q2)*100/float32(q3)) % 16
343362
qRatio := ((q1Ratio & 0xF) << 4) | (q2Ratio & 0xF)
@@ -360,22 +379,22 @@ type FuzzyReader interface {
360379
io.ByteReader
361380
}
362381

363-
//HashReader calculates the TLSH for the input reader
382+
// HashReader calculates the TLSH for the input reader
364383
func HashReader(r FuzzyReader) (*TLSH, error) {
365384
t, err := hashCalculate(r)
366385
if err != nil {
367-
return &TLSH{}, err
386+
return &TLSH{state: t.state}, err
368387
}
369388
return t, err
370389
}
371390

372-
//HashBytes calculates the TLSH for the input byte slice
391+
// HashBytes calculates the TLSH for the input byte slice
373392
func HashBytes(blob []byte) (*TLSH, error) {
374393
r := bytes.NewReader(blob)
375394
return HashReader(r)
376395
}
377396

378-
//HashFilename calculates the TLSH for the input file
397+
// HashFilename calculates the TLSH for the input file
379398
func HashFilename(filename string) (*TLSH, error) {
380399
f, err := os.Open(filename)
381400
if err != nil {

tlsh_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ var (
2323
{"tests/test_file_9_tinyssl.exe", "67a3ad97f601c873e11a0af49d83d2d6bc7f7f709e522c9b74990b0e8d796822d1d48a"},
2424
{"tests/NON_EXISTENT", "0000000000000000000000000000000000000000000000000000000000000000000000"},
2525
{"tests/test_file_empty", "0000000000000000000000000000000000000000000000000000000000000000000000"},
26+
{"tests/test_file_q3zero", "0000000000000000000000000000000000000000000000000000000000000000000000"},
27+
{"tests/test_file_49bytes", "0000000000000000000000000000000000000000000000000000000000000000000000"},
2628
}
2729

2830
diffTestCases = []struct {

0 commit comments

Comments
 (0)