Skip to content

Commit 715f439

Browse files
nitinbilUbuntu
andauthored
Optimize TPCH DDL for faster MySQL loading (#594)
This pull request includes changes to improve the performance of loading the TPCH database on a MySQL instance by deferring index creation to a post-load script. The most important changes include adding a new post-load script reference and removing index creation statements from the DDL script. Performance improvements: * [`config/mysql/sample_tpch_config.xml`](diffhunk://#diff-a48fec690069601e7837968f1848c04583a74352c5d98597f05e55794fd7cb00R17-R22): Added a reference to the new post-load script `postload-mysql.sql` to create indices after loading the database, which improves performance by nearly 30%. Codebase simplification: * [`src/main/resources/benchmarks/tpch/ddl-mysql.sql`](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL19): Removed all index creation statements from the DDL script to speed up the initial load process. [[1]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL19) [[2]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL29-L30) [[3]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL44) [[4]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL57-L58) [[5]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL70-L73) [[6]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL87-L88) [[7]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL103-L105) [[8]](diffhunk://#diff-d0e0637fb2f0a9ee276e7046d3e142c830e9a652deb6a15afb2a1733e15d3caeL128-L135) * [`src/main/resources/benchmarks/tpch/postload-mysql.sql`](diffhunk://#diff-b8a6513014cff301d33e49c05085cbc3c32a97d5b1696d14739b0da57ed9f154R1-R24): Added the removed index creation statements to this new post-load script to be executed after the data load is complete. --------- Co-authored-by: Ubuntu <nitinver@nitin-test-client-16cpu2.z20r5dv4atzenijzzeni1czzpf.cx.internal.cloudapp.net>
1 parent 46fc66f commit 715f439

File tree

3 files changed

+48
-24
lines changed

3 files changed

+48
-24
lines changed

config/mysql/sample_tpch_config.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@
1414
<!-- Control scale factor to generate different amount of data -->
1515
<scalefactor>0.1</scalefactor>
1616

17+
<!--
18+
The post load script is crucial for creating TPCH indices after loading the database.
19+
Index creation was removed from the DDL script to speed up the load process.
20+
Creating indices post load improves performance by nearly 30%.
21+
See src/main/resources/benchmarks/tpch/postload-mysql.sql
22+
-->
23+
<afterload>/benchmarks/tpch/postload-mysql.sql</afterload>
24+
1725
<!-- The workload -->
1826
<terminals>1</terminals>
1927
<works>

src/main/resources/benchmarks/tpch/ddl-mysql.sql

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
/*
2+
For MySQL, TPCH indices are created post-load. which improves load
3+
performance. See src/main/resources/benchmarks/tpch/postload-mysql.sql
4+
(specified in <afterload> in mysql/sample_tpch_config.xml). When indices
5+
are created before the load, the insert operations increases overall
6+
load time by >30%. This happens because every insert needs to update
7+
all table indices, which results into additional binlog/redo log updates,
8+
index seeks, and more data IOPS (if data does not fit in memory).
9+
*/
10+
111
SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
212
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
313

@@ -16,7 +26,6 @@ CREATE TABLE region (
1626
r_comment varchar(152),
1727
PRIMARY KEY (r_regionkey)
1828
);
19-
CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);
2029

2130
CREATE TABLE nation (
2231
n_nationkey integer NOT NULL,
@@ -26,8 +35,6 @@ CREATE TABLE nation (
2635
PRIMARY KEY (n_nationkey),
2736
FOREIGN KEY (n_regionkey) REFERENCES region (r_regionkey) ON DELETE CASCADE
2837
);
29-
CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
30-
CREATE INDEX n_rk ON nation (n_regionkey ASC);
3138

3239
CREATE TABLE part (
3340
p_partkey integer NOT NULL,
@@ -41,7 +48,6 @@ CREATE TABLE part (
4148
p_comment varchar(23) NOT NULL,
4249
PRIMARY KEY (p_partkey)
4350
);
44-
CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);
4551

4652
CREATE TABLE supplier (
4753
s_suppkey integer NOT NULL,
@@ -54,8 +60,6 @@ CREATE TABLE supplier (
5460
PRIMARY KEY (s_suppkey),
5561
FOREIGN KEY (s_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE
5662
);
57-
CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
58-
CREATE INDEX s_nk ON supplier (s_nationkey ASC);
5963

6064
CREATE TABLE partsupp (
6165
ps_partkey integer NOT NULL,
@@ -67,10 +71,6 @@ CREATE TABLE partsupp (
6771
FOREIGN KEY (ps_partkey) REFERENCES part (p_partkey) ON DELETE CASCADE,
6872
FOREIGN KEY (ps_suppkey) REFERENCES supplier (s_suppkey) ON DELETE CASCADE
6973
);
70-
CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
71-
CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
72-
CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
73-
CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);
7474

7575
CREATE TABLE customer (
7676
c_custkey integer NOT NULL,
@@ -84,8 +84,6 @@ CREATE TABLE customer (
8484
PRIMARY KEY (c_custkey),
8585
FOREIGN KEY (c_nationkey) REFERENCES nation (n_nationkey) ON DELETE CASCADE
8686
);
87-
CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
88-
CREATE INDEX c_nk ON customer (c_nationkey ASC);
8987

9088
CREATE TABLE orders (
9189
o_orderkey integer NOT NULL,
@@ -100,9 +98,6 @@ CREATE TABLE orders (
10098
PRIMARY KEY (o_orderkey),
10199
FOREIGN KEY (o_custkey) REFERENCES customer (c_custkey) ON DELETE CASCADE
102100
);
103-
CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
104-
CREATE INDEX o_ck ON orders (o_custkey ASC);
105-
CREATE INDEX o_od ON orders (o_orderdate ASC);
106101

107102
CREATE TABLE lineitem (
108103
l_orderkey integer NOT NULL,
@@ -125,14 +120,6 @@ CREATE TABLE lineitem (
125120
FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey) ON DELETE CASCADE,
126121
FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey) ON DELETE CASCADE
127122
);
128-
CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
129-
CREATE INDEX l_pk ON lineitem (l_partkey ASC);
130-
CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
131-
CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
132-
CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
133-
CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
134-
CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
135-
CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);
136123

137124
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
138-
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
125+
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
This script runs after TPCH table creation and data loading.
3+
It improves overall load performance by approximately 30%.
4+
This script is referenced by the <afterLoad> parameter in
5+
mysql/sample_tpch_config.xml.
6+
*/
7+
CREATE UNIQUE INDEX r_rk ON region (r_regionkey ASC);
8+
CREATE UNIQUE INDEX n_nk ON nation (n_nationkey ASC);
9+
CREATE INDEX n_rk ON nation (n_regionkey ASC);
10+
CREATE UNIQUE INDEX p_pk ON part (p_partkey ASC);
11+
CREATE UNIQUE INDEX s_sk ON supplier (s_suppkey ASC);
12+
CREATE INDEX s_nk ON supplier (s_nationkey ASC);
13+
CREATE INDEX ps_pk ON partsupp (ps_partkey ASC);
14+
CREATE INDEX ps_sk ON partsupp (ps_suppkey ASC);
15+
CREATE UNIQUE INDEX ps_pk_sk ON partsupp (ps_partkey ASC, ps_suppkey ASC);
16+
CREATE UNIQUE INDEX ps_sk_pk ON partsupp (ps_suppkey ASC, ps_partkey ASC);
17+
CREATE UNIQUE INDEX c_ck ON customer (c_custkey ASC);
18+
CREATE INDEX c_nk ON customer (c_nationkey ASC);
19+
CREATE UNIQUE INDEX o_ok ON orders (o_orderkey ASC);
20+
CREATE INDEX o_ck ON orders (o_custkey ASC);
21+
CREATE INDEX o_od ON orders (o_orderdate ASC);
22+
CREATE INDEX l_ok ON lineitem (l_orderkey ASC);
23+
CREATE INDEX l_pk ON lineitem (l_partkey ASC);
24+
CREATE INDEX l_sk ON lineitem (l_suppkey ASC);
25+
CREATE INDEX l_sd ON lineitem (l_shipdate ASC);
26+
CREATE INDEX l_cd ON lineitem (l_commitdate ASC);
27+
CREATE INDEX l_rd ON lineitem (l_receiptdate ASC);
28+
CREATE INDEX l_pk_sk ON lineitem (l_partkey ASC, l_suppkey ASC);
29+
CREATE INDEX l_sk_pk ON lineitem (l_suppkey ASC, l_partkey ASC);

0 commit comments

Comments
 (0)