Skip to content

Commit 0512dcd

Browse files
[CYB-212]TLSH dependency switch due to Maven central unavailability (#86)
* Adding tlsh implementation. * Update unit tests. Co-authored-by: Stas Panasiuk <[email protected]>
1 parent 6049872 commit 0512dcd

File tree

14 files changed

+1529
-576
lines changed

14 files changed

+1529
-576
lines changed

flink-cyber/flink-stellar/pom.xml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,6 @@
7777
<groupId>org.adrianwalker</groupId>
7878
<artifactId>multiline-string</artifactId>
7979
</dependency>
80-
<dependency>
81-
<groupId>com.trendmicro</groupId>
82-
<artifactId>tlsh</artifactId>
83-
<version>3.7.1</version>
84-
</dependency>
8580
<dependency>
8681
<groupId>org.apache.commons</groupId>
8782
<artifactId>commons-math3</artifactId>
@@ -252,6 +247,12 @@
252247
<version>${global_hamcrest_version}</version>
253248
<scope>test</scope>
254249
</dependency>
250+
<dependency>
251+
<groupId>org.junit.jupiter</groupId>
252+
<artifactId>junit-jupiter</artifactId>
253+
<version>${jupiter.junit.version}</version>
254+
<scope>test</scope>
255+
</dependency>
255256
<dependency>
256257
<groupId>org.junit.jupiter</groupId>
257258
<artifactId>junit-jupiter-api</artifactId>
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package org.apache.metron.stellar.common.utils.hashing.tlsh;
2+
3+
class BitPairsTable {
4+
5+
private static final int BIT_PAIRS_DIFF_TABLE_SIZE = 256;
6+
7+
private final int[][] table;
8+
9+
BitPairsTable() {
10+
this.table = generateDefaultBitPairsDiffTable();
11+
}
12+
13+
private static int[][] generateDefaultBitPairsDiffTable() {
14+
int[][] result = new int[BIT_PAIRS_DIFF_TABLE_SIZE][BIT_PAIRS_DIFF_TABLE_SIZE];
15+
16+
for (int i = 0; i < BIT_PAIRS_DIFF_TABLE_SIZE; i++) {
17+
for (int j = 0; j < BIT_PAIRS_DIFF_TABLE_SIZE; j++) {
18+
int x = i;
19+
int y = j;
20+
int diff = 0;
21+
22+
for (int z = 0; z < 4; z++) {
23+
int d = Math.abs(x % 4 - y % 4);
24+
25+
if (d == 3) {
26+
diff += d * 2;
27+
} else {
28+
diff += d;
29+
}
30+
31+
if (z < 3) {
32+
x /= 4;
33+
y /= 4;
34+
}
35+
}
36+
37+
result[i][j] = diff;
38+
}
39+
}
40+
41+
return result;
42+
}
43+
44+
public int getValue(int row, int column) {
45+
return table[row][column];
46+
}
47+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package org.apache.metron.stellar.common.utils.hashing.tlsh;
2+
3+
4+
import java.util.function.IntUnaryOperator;
5+
import java.util.stream.IntStream;
6+
7+
public class SlidingWindow {
8+
public static final int DEFAULT_SIZE = 5;
9+
private final byte[] window;
10+
private int byteCount = 0;
11+
12+
SlidingWindow() {
13+
this.window = new byte[DEFAULT_SIZE];
14+
}
15+
16+
public void put(final byte value) {
17+
int cursor = byteCount % window.length;
18+
window[cursor] = value;
19+
byteCount++;
20+
}
21+
22+
public int[] getWindow() {
23+
final int startPosition = (byteCount - 1) % window.length;
24+
final IntUnaryOperator reverseIterate = i -> i == 0 ? window.length - 1 : i - 1;
25+
final IntUnaryOperator mapper = i -> window[i] & 0xFF;
26+
return IntStream.iterate(startPosition, reverseIterate)
27+
.limit(window.length)
28+
.map(mapper)
29+
.toArray();
30+
}
31+
32+
public int getByteCount() {
33+
return byteCount;
34+
}
35+
36+
public int getWindowSize() {
37+
return window.length;
38+
}
39+
}

flink-cyber/flink-stellar/src/main/java/org/apache/metron/stellar/common/utils/hashing/tlsh/TLSH.java

Lines changed: 70 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,42 +17,87 @@
1717
*/
1818
package org.apache.metron.stellar.common.utils.hashing.tlsh;
1919

20-
import com.trendmicro.tlsh.BucketOption;
21-
import com.trendmicro.tlsh.ChecksumOption;
22-
import com.trendmicro.tlsh.Tlsh;
23-
import com.trendmicro.tlsh.TlshCreator;
20+
import java.nio.ByteBuffer;
2421

25-
import java.util.Optional;
22+
import static org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHUtil.swapNibble;
2623

2724
/**
2825
* The abstraction around interacting with TLSH.
2926
*/
3027
public class TLSH {
31-
private TlshCreator creator;
32-
public TLSH(BucketOption bucketOption, ChecksumOption checksumOption) {
33-
creator = new TlshCreator(bucketOption, checksumOption);
34-
}
3528

36-
public String apply(byte[] data, boolean force) {
37-
try {
38-
creator.update(data);
39-
return creator.getHash(force).getEncoded();
40-
} finally {
41-
creator.reset();
29+
/**
30+
* The checksum bytes.
31+
*/
32+
private final int[] checksum;
33+
/**
34+
* The buckets bytes.
35+
*/
36+
private final int[] codes;
37+
/**
38+
* The encoded length value.
39+
*/
40+
private final int lValue;
41+
/**
42+
* The q1 ratio.
43+
*/
44+
private final int q1Ratio;
45+
/**
46+
* The q2 ratio.
47+
*/
48+
private final int q2Ratio;
49+
50+
51+
public TLSH(int[] checksum, int[] codes, int lValue, int q1, int q2) {
52+
this.checksum = checksum;
53+
this.codes = codes;
54+
this.lValue = lValue;
55+
this.q1Ratio = q1;
56+
this.q2Ratio = q2;
57+
}
58+
59+
60+
public String getHash() {
61+
return TLSHUtil.bytesToHex(getHexBytes());
62+
}
63+
64+
public int[] getChecksum() {
65+
return checksum;
66+
}
67+
68+
public int[] getCodes() {
69+
return codes;
4270
}
43-
}
4471

45-
public static int distance(String hash1, String hash2, Optional<Boolean> includeLength) {
46-
if (hash1 == null || hash2 == null) {
47-
return -1;
72+
public int getlValue() {
73+
return lValue;
4874
}
4975

50-
if (hash1.equals(hash2)) {
51-
return 0;
76+
public int getQ1Ratio() {
77+
return q1Ratio;
5278
}
5379

54-
Tlsh t1 = Tlsh.fromTlshStr(hash1);
55-
Tlsh t2 = Tlsh.fromTlshStr(hash2);
56-
return t1.totalDiff(t2, includeLength.orElse(false));
57-
}
80+
public int getQ2Ratio() {
81+
return q2Ratio;
82+
}
83+
84+
public byte[] getHexBytes() {
85+
final ByteBuffer buf = ByteBuffer.allocate(checksum.length + 2 + codes.length);
86+
for (final int c : checksum) {
87+
buf.put((byte) swapNibble(c));
88+
}
89+
buf.put((byte) swapNibble(lValue));
90+
buf.put((byte) (q1Ratio << 4 | q2Ratio));
91+
for (int i = codes.length - 1; i >= 0; i--) {
92+
buf.put((byte) codes[i]);
93+
}
94+
buf.flip();
95+
if (buf.hasArray() && 0 == buf.arrayOffset()) {
96+
return buf.array();
97+
} else {
98+
final byte[] hash = new byte[buf.remaining()];
99+
buf.get(hash);
100+
return hash;
101+
}
102+
}
58103
}

0 commit comments

Comments
 (0)