Skip to content

Commit b4f4b41

Browse files
authored
Thread-safe md5 (#128)
* use ThreadLocal for MD5 digest * unit test for thread-safe md5 * don't hash at runtime evaluation of operator * update method name * spotless fixes * not autoclosable until Java 9 * use numThreads variable for loop
1 parent f329648 commit b4f4b41

File tree

4 files changed

+90
-22
lines changed

4 files changed

+90
-22
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ plugins {
66
}
77

88
group = 'cloud.eppo'
9-
version = '3.11.2-SNAPSHOT'
9+
version = '3.11.2'
1010
ext.isReleaseVersion = !version.endsWith("SNAPSHOT")
1111

1212
java {

src/main/java/cloud/eppo/Utils.java

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,20 @@
1515
public final class Utils {
1616
private static final SimpleDateFormat UTC_ISO_DATE_FORMAT = buildUtcIsoDateFormat();
1717
private static final Logger log = LoggerFactory.getLogger(Utils.class);
18-
private static final MessageDigest md = buildMd5MessageDigest();
19-
20-
private static MessageDigest buildMd5MessageDigest() {
21-
try {
22-
return MessageDigest.getInstance("MD5");
23-
} catch (NoSuchAlgorithmException e) {
24-
throw new RuntimeException("Error computing md5 hash", e);
25-
}
18+
private static final ThreadLocal<MessageDigest> md = buildMd5MessageDigest();
19+
20+
@SuppressWarnings("AnonymousHasLambdaAlternative")
21+
private static ThreadLocal<MessageDigest> buildMd5MessageDigest() {
22+
return new ThreadLocal<MessageDigest>() {
23+
@Override
24+
protected MessageDigest initialValue() {
25+
try {
26+
return MessageDigest.getInstance("MD5");
27+
} catch (NoSuchAlgorithmException e) {
28+
throw new RuntimeException("Error initializing MD5 hash", e);
29+
}
30+
}
31+
};
2632
}
2733

2834
public static void throwIfEmptyOrNull(String input, String errorMessage) {
@@ -37,8 +43,8 @@ public static void throwIfEmptyOrNull(String input, String errorMessage) {
3743
*/
3844
public static String getMD5Hex(String input) {
3945
// md5 the input
40-
md.reset();
41-
byte[] md5Bytes = md.digest(input.getBytes());
46+
md.get().reset();
47+
byte[] md5Bytes = md.get().digest(input.getBytes());
4248
// Pre-allocate a StringBuilder with a capacity of 32 characters
4349
StringBuilder hexString = new StringBuilder(32);
4450

@@ -58,8 +64,8 @@ public static String getMD5Hex(String input) {
5864
*/
5965
public static int getShard(String input, int maxShardValue) {
6066
// md5 the input
61-
md.reset();
62-
byte[] md5Bytes = md.digest(input.getBytes());
67+
md.get().reset();
68+
byte[] md5Bytes = md.get().digest(input.getBytes());
6369

6470
// Extract the first 4 bytes (8 digits) and convert to a long
6571
long value = 0;

src/main/java/cloud/eppo/ufc/dto/OperatorType.java

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import static cloud.eppo.Utils.getMD5Hex;
44

5+
import java.util.HashMap;
6+
import java.util.Map;
7+
58
public enum OperatorType {
69
NOT_MATCHES("NOT_MATCHES"),
710
MATCHES("MATCHES"),
@@ -14,20 +17,40 @@ public enum OperatorType {
1417
IS_NULL("IS_NULL");
1518

1619
public final String value;
20+
private static final Map<String, OperatorType> valuesToOperatorType =
21+
buildValueToOperatorTypeMap();
22+
private static final Map<String, OperatorType> hashesToOperatorType =
23+
buildHashToOperatorTypeMap();
24+
25+
private static Map<String, OperatorType> buildValueToOperatorTypeMap() {
26+
Map<String, OperatorType> result = new HashMap<>();
27+
for (OperatorType type : OperatorType.values()) {
28+
result.put(type.value, type);
29+
}
30+
return result;
31+
}
32+
33+
private static Map<String, OperatorType> buildHashToOperatorTypeMap() {
34+
Map<String, OperatorType> result = new HashMap<>();
35+
for (OperatorType type : OperatorType.values()) {
36+
result.put(getMD5Hex(type.value), type);
37+
}
38+
return result;
39+
}
1740

1841
OperatorType(String value) {
1942
this.value = value;
2043
}
2144

2245
public static OperatorType fromString(String value) {
23-
for (OperatorType type : OperatorType.values()) {
24-
if (type.value.equals(value)
25-
|| getMD5Hex(type.value).equals(value)
26-
|| getMD5Hex(type.value).equals(value)) {
27-
return type;
28-
}
46+
// First we try obfuscated lookup as in client situations we'll care more about ingestion
47+
// performance
48+
OperatorType type = hashesToOperatorType.get(value);
49+
// Then we'll try non-obfuscated lookup
50+
if (type == null) {
51+
type = valuesToOperatorType.get(value);
2952
}
30-
return null;
53+
return type;
3154
}
3255

3356
public boolean isInequalityComparison() {

src/test/java/cloud/eppo/UtilsTest.java

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
package cloud.eppo;
22

33
import static cloud.eppo.Utils.*;
4-
import static org.junit.jupiter.api.Assertions.assertEquals;
5-
import static org.junit.jupiter.api.Assertions.assertNull;
4+
import static org.junit.jupiter.api.Assertions.*;
65

76
import com.fasterxml.jackson.core.JsonProcessingException;
87
import com.fasterxml.jackson.databind.JsonNode;
98
import com.fasterxml.jackson.databind.ObjectMapper;
109
import java.util.Date;
10+
import java.util.concurrent.ExecutorService;
11+
import java.util.concurrent.Executors;
12+
import java.util.concurrent.TimeUnit;
13+
import java.util.concurrent.atomic.AtomicBoolean;
1114
import org.junit.jupiter.api.Test;
1215

1316
public class UtilsTest {
@@ -25,6 +28,42 @@ public void testGetMd5Hash() {
2528
assertEquals("429fb7196ccb2978443a0de8da180e00", getMD5Hex("input-34"));
2629
}
2730

31+
@Test
32+
public void testGetMd5HashThreadSafe() {
33+
final AtomicBoolean interferenceEncountered = new AtomicBoolean(false);
34+
int numThreads = 2;
35+
ExecutorService pool = Executors.newFixedThreadPool(numThreads);
36+
try {
37+
for (int i = 0; i < numThreads; i += 1) {
38+
pool.execute(
39+
() -> {
40+
if (testForMd5Interference()) {
41+
interferenceEncountered.set(true);
42+
}
43+
});
44+
}
45+
pool.shutdown();
46+
} finally {
47+
try {
48+
assertTrue(pool.awaitTermination(5, TimeUnit.SECONDS));
49+
} catch (InterruptedException ex) {
50+
fail();
51+
}
52+
assertFalse(interferenceEncountered.get());
53+
}
54+
}
55+
56+
private boolean testForMd5Interference() {
57+
boolean interferenceEncountered = false;
58+
for (int i = 0; i < 100; i += 1) {
59+
if (!getMD5Hex("input-62").equals("448614887a99f16179b400cfccceb72d")) {
60+
interferenceEncountered = true;
61+
break;
62+
}
63+
}
64+
return interferenceEncountered;
65+
}
66+
2867
@Test
2968
public void testGetShard() {
3069
// Shard is the first 8 digits read as a number and modulo into the space

0 commit comments

Comments
 (0)