You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Optimize MySQL Bootstrap Chunking by Switching from MD5 to CRC32 (#987)
The existing MD5-based partitioning strategy caused chunking and bootstrapping of large tables to be extremely slow and prone to connection timeouts. The partitioning strategy has been updated to use CRC32 instead of MD5 for MySQL bootstrapping.
Copy file name to clipboardExpand all lines: datastream-common/src/main/java/com/linkedin/datastream/common/databases/dbreader/MySqlChunkedQueryManager.java
+6-8Lines changed: 6 additions & 8 deletions
Original file line number
Diff line number
Diff line change
@@ -19,7 +19,7 @@ public class MySqlChunkedQueryManager implements ChunkedQueryManager {
19
19
privatestaticfinalStringSELECT_FROM = "SELECT * FROM ( ";
20
20
21
21
/** Generate base predicate for sharding keys to given number of partitions.
22
-
* Ex: MOD ( CONV ( MD5 ( CONCAT ( K1, K2, K3 ) ) , 16, 10 ) , 10 ) for a table with 3 keys {K1, K2, K3} and 10 partitions */
22
+
* Ex: MOD ( CRC32 ( CONCAT ( K1, K2, K3 ) ), 10 ) for a table with 3 keys {K1, K2, K3} and 10 partitions */
Copy file name to clipboardExpand all lines: datastream-common/src/test/java/com/linkedin/datastream/common/databases/dbreader/TestMysqlChunkedQueryManager.java
+16-16Lines changed: 16 additions & 16 deletions
Original file line number
Diff line number
Diff line change
@@ -27,13 +27,13 @@ public void testSimpleKeySinglePartition() {
27
27
* (
28
28
* SELECT * FROM TABLE
29
29
* ) nestedTab1
30
-
* WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) )
30
+
* WHERE ( MOD ( CRC32 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) )
31
31
* ORDER BY KEY1
32
32
* ) as nestedTab2 LIMIT 10;
33
33
*/
34
34
StringfirstExpected =
35
35
"SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
36
-
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
36
+
+ "WHERE ( MOD ( CRC32 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
37
37
38
38
/**
39
39
* SELECT * FROM
@@ -42,12 +42,12 @@ public void testSimpleKeySinglePartition() {
42
42
* (
43
43
* SELECT * FROM TABLE
44
44
* ) nestedTab1
45
-
* WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) )
45
+
* WHERE ( MOD ( CRC32 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) )
46
46
* ORDER BY KEY1
47
47
* ) as nestedTab2 LIMIT 10;
48
48
*/
49
49
StringchunkedExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
50
-
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
50
+
+ "WHERE ( MOD ( CRC32 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
0 commit comments