Skip to content

Commit 82c990d

Browse files
authored
CI checks on benchmark histograms and associated benchmark fixups (#268)
As previously noted in #122 the Wikipedia benchmark has a bug that results in most transactions getting aborted. The fix developed in #173 by @apavlo seems to mostly fix it (though there are still some duplicate key errors generated by the benchmark's AddWatchPage procedure). This PR pulls in those changes plus some additional ones (below) and additionally adds some CI checks on the json histogram outputs so that any benchmark the generates errors more than 1% of the completed transactions should be considered a failure (previously a CI failure was reported only if java exited non-zero, which doesn't happen when the many transactions fail, only when an unexpected exception occurs). The 1% value is just an initial threshold and can be adjusted both on a per database and a per benchmark basis. We do this in several places (e.g. TATP benchmark) in order to keep the changes required for this PR somewhat smaller and leave it for future work to improve those benchmarks. To address the Wikipedia failures in some databases, we expand on auto-increment column support to SqlServer, add an auto-increment DDL schema for hsqldb for unit test purposes. For others, we introduce improved schemas and/or dialects to fix query errors. Note: in the case of sqlite, which currently lacks support for things like native `SLEEP` or `TABLOCK` we leave a more simplified version of the query for now and mark it for future work to improve.
1 parent ef5011d commit 82c990d

30 files changed

+987
-100
lines changed

.github/workflows/maven.yml

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ on:
3434
env:
3535
POM_VERSION: 2021-SNAPSHOT
3636
JAVA_VERSION: 17
37+
ERRORS_THRESHOLD: 0.01
3738

3839
jobs:
3940
compile-and-test:
@@ -85,6 +86,7 @@ jobs:
8586
needs: package-and-upload
8687
runs-on: ubuntu-latest
8788
strategy:
89+
fail-fast: false
8890
matrix:
8991
# BROKEN: tpch
9092
benchmark: [ 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
@@ -110,7 +112,18 @@ jobs:
110112

111113
- name: Run benchmark
112114
run: |
113-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlite/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true
115+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlite/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
116+
# FIXME: Reduce the error rate so we don't need these overrides.
117+
if [ ${{matrix.benchmark}} == auctionmark ]; then
118+
ERRORS_THRESHOLD=0.02
119+
elif [ ${{matrix.benchmark}} == resourcestresser ]; then
120+
ERRORS_THRESHOLD=0.05
121+
elif [ ${{matrix.benchmark}} == smallbank ]; then
122+
ERRORS_THRESHOLD=0.03
123+
elif [ ${{matrix.benchmark}} == tatp ]; then
124+
ERRORS_THRESHOLD=0.05
125+
fi
126+
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
114127
115128
## ----------------------------------------------------------------------------------
116129
## MARIADB
@@ -119,6 +132,7 @@ jobs:
119132
needs: package-and-upload
120133
runs-on: ubuntu-latest
121134
strategy:
135+
fail-fast: false
122136
matrix:
123137
benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
124138
services:
@@ -161,7 +175,14 @@ jobs:
161175
MARIADB_PORT: ${{ job.services.mariadb.ports[3306] }}
162176
run: |
163177
mysql -h127.0.0.1 -P$MARIADB_PORT -uadmin -ppassword -e "DROP DATABASE IF EXISTS benchbase; CREATE DATABASE benchbase"
164-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mariadb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true
178+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mariadb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
179+
# FIXME: Reduce the error rate so we don't need these overrides.
180+
if [ ${{matrix.benchmark}} == auctionmark ]; then
181+
ERRORS_THRESHOLD=0.02
182+
elif [ ${{matrix.benchmark}} == tatp ]; then
183+
ERRORS_THRESHOLD=0.05
184+
fi
185+
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
165186
166187
## ----------------------------------------------------------------------------------
167188
## MYSQL
@@ -170,6 +191,7 @@ jobs:
170191
needs: package-and-upload
171192
runs-on: ubuntu-latest
172193
strategy:
194+
fail-fast: false
173195
matrix:
174196
benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
175197
services:
@@ -212,7 +234,14 @@ jobs:
212234
MYSQL_PORT: ${{ job.services.mysql.ports[3306] }}
213235
run: |
214236
mysql -h127.0.0.1 -P$MYSQL_PORT -uadmin -ppassword -e "DROP DATABASE IF EXISTS benchbase; CREATE DATABASE benchbase"
215-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mysql/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true
237+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/mysql/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
238+
# FIXME: Reduce the error rate so we don't need these overrides.
239+
if [ ${{matrix.benchmark}} == auctionmark ]; then
240+
ERRORS_THRESHOLD=0.02
241+
elif [ ${{matrix.benchmark}} == tatp ]; then
242+
ERRORS_THRESHOLD=0.05
243+
fi
244+
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
216245
217246
## ----------------------------------------------------------------------------------
218247
## POSTGRESQL
@@ -221,6 +250,7 @@ jobs:
221250
needs: package-and-upload
222251
runs-on: ubuntu-latest
223252
strategy:
253+
fail-fast: false
224254
matrix:
225255
benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
226256
services:
@@ -261,7 +291,14 @@ jobs:
261291
run: |
262292
PGPASSWORD=password dropdb -h localhost -U admin benchbase --if-exists
263293
PGPASSWORD=password createdb -h localhost -U admin benchbase
264-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true
294+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/postgres/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
295+
# FIXME: Reduce the error rate so we don't need these overrides.
296+
if [ ${{matrix.benchmark}} == auctionmark ]; then
297+
ERRORS_THRESHOLD=0.02
298+
elif [ ${{matrix.benchmark}} == tatp ]; then
299+
ERRORS_THRESHOLD=0.05
300+
fi
301+
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
265302
266303
## ----------------------------------------------------------------------------------
267304
## COCKROACHDB
@@ -270,6 +307,7 @@ jobs:
270307
needs: package-and-upload
271308
runs-on: ubuntu-latest
272309
strategy:
310+
fail-fast: false
273311
matrix:
274312
benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
275313
services:
@@ -302,7 +340,15 @@ jobs:
302340

303341
- name: Run benchmark
304342
run: |
305-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/cockroachdb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true
343+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/cockroachdb/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
344+
# FIXME: Reduce the error rate so we don't need these overrides.
345+
# FIXME: Reduce the error rate so we don't need these overrides.
346+
if [ ${{matrix.benchmark}} == auctionmark ]; then
347+
ERRORS_THRESHOLD=0.02
348+
elif [ ${{matrix.benchmark}} == tatp ]; then
349+
ERRORS_THRESHOLD=0.05
350+
fi
351+
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
306352
307353
## ----------------------------------------------------------------------------------
308354
## MSSQL
@@ -311,6 +357,7 @@ jobs:
311357
needs: package-and-upload
312358
runs-on: ubuntu-latest
313359
strategy:
360+
fail-fast: false
314361
matrix:
315362
# TODO: add more benchmarks
316363
#benchmark: [ 'auctionmark', 'epinions', 'hyadapt', 'noop', 'otmetrics', 'resourcestresser', 'seats', 'sibench', 'smallbank', 'tatp', 'tpcc', 'tpch', 'twitter', 'voter', 'wikipedia', 'ycsb' ]
@@ -375,7 +422,12 @@ jobs:
375422
- name: Run benchmark
376423
# Note: user/pass should match those used in sample configs.
377424
run: |
378-
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true
425+
java -jar benchbase.jar -b ${{matrix.benchmark}} -c config/sqlserver/sample_${{matrix.benchmark}}_config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
426+
# FIXME: Reduce the error rate so we don't need these overrides.
427+
if [ ${{matrix.benchmark}} == tatp ]; then
428+
ERRORS_THRESHOLD=0.05
429+
fi
430+
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
379431
380432
## ----------------------------------------------------------------------------------
381433
## Docker Build Test Publish
@@ -447,9 +499,10 @@ jobs:
447499
mkdir -p results
448500
docker run --rm --name benchbase-postgres --network "$docker_network" \
449501
--env BENCHBASE_PROFILE=postgres -v /tmp/config.xml:/tmp/config.xml -v "$PWD/results:/benchbase/results" \
450-
"$image" -b "$benchmark" -c /tmp/config.xml --create=true --load=true --execute=true
502+
"$image" -b "$benchmark" -c /tmp/config.xml --create=true --load=true --execute=true --json-histograms results/histograms.json
451503
# Test that the results files were produced.
452504
ls results/${benchmark}_*.csv
505+
./scripts/check_histogram_results.sh results/histograms.json $ERRORS_THRESHOLD
453506
done
454507
# Publish the docker image if the build/test was successful.
455508
# Only do this with approved PRs and if the login secrets are available.

docker/build-run-benchmark-with-docker.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,4 @@ SKIP_TESTS=${SKIP_TESTS:-true} EXTRA_DOCKER_ARGS="--network=host $EXTRA_DOCKER_A
4444
$CREATE_DB_ARGS --execute=true \
4545
--sample 1 --interval-monitor 1000 \
4646
--json-histograms results/histograms.json
47+
./scripts/check_histogram_results.sh results/historgrams.json

docker/mysql-latest/docker-compose.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ services:
88
image: mysql:latest
99
command: --max-connections=500
1010
environment:
11-
MYSQL_USER: admin
12-
MYSQL_PASSWORD: password
1311
MYSQL_ROOT_PASSWORD: password
1412
MYSQL_USER: admin
1513
MYSQL_PASSWORD: password

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@
231231
<dependency>
232232
<groupId>org.hsqldb</groupId>
233233
<artifactId>hsqldb</artifactId>
234-
<version>2.6.1</version>
234+
<version>2.7.1</version>
235235
</dependency>
236236

237237
<dependency>

scripts/check_histogram_results.sh

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/bin/bash
2+
3+
# A simple script to check the error rate in the --json-histograms file results.
4+
5+
set -eu
6+
set -o pipefail
7+
8+
# Move to the root of the repository.
9+
scriptdir=$(dirname "$(readlink -f "$0")")
10+
cd "$scriptdir/.."
11+
12+
if ! type jq >/dev/null 2>&1; then
13+
echo "ERROR: Missing jq utility. Please install it (e.g. using apt-get)." >&2
14+
exit 1
15+
fi
16+
17+
results_json="${1:-results/histograms.json}"
18+
threshold="${2:-0.01}"
19+
20+
echo "INFO: Checking that error results in $results_json are below $threshold"
21+
22+
if ! [ -s "$results_json" ]; then
23+
echo "ERROR: Missing or empty results file: $results_json" >&2
24+
exit 1
25+
fi
26+
27+
# First transform the histograms into a single object with aggregate count of error and completed samples.
28+
summary_json=$(cat "$results_json" | jq -e '
29+
to_entries
30+
| {
31+
"completed_samples": ( .[] | select(.key == "completed") | .value.NUM_SAMPLES ),
32+
"errored_samples": ( [ .[] | select(.key != "completed" ) | .value.NUM_SAMPLES ] | add )
33+
}
34+
| .error_rate = (.errored_samples / .completed_samples)'
35+
)
36+
37+
# Print it out for debugging.
38+
echo "$summary_json" | jq -e .
39+
40+
# Check the value of the error rate.
41+
if ! echo "$summary_json" | jq -e '(.error_rate < '$threshold')' >/dev/null; then
42+
echo "ERROR: error rate is too high"
43+
exit 1
44+
fi
45+
# else
46+
echo 'OK!'
47+
exit 0

src/main/java/com/oltpbenchmark/api/Loader.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,14 +119,20 @@ public void unload(Connection conn, AbstractCatalog catalog) throws SQLException
119119

120120
protected void updateAutoIncrement(Connection conn, Column catalog_col, int value) throws SQLException {
121121
String sql = null;
122-
if (getDatabaseType() == DatabaseType.POSTGRES) {
123-
String seqName = SQLUtil.getSequenceName(getDatabaseType(), catalog_col);
124-
125-
sql = String.format("SELECT setval(%s, %d)", seqName.toLowerCase(), value);
122+
String seqName = SQLUtil.getSequenceName(conn, getDatabaseType(), catalog_col);
123+
DatabaseType dbType = getDatabaseType();
124+
if (seqName != null) {
125+
if (dbType == DatabaseType.POSTGRES) {
126+
sql = String.format("SELECT setval('%s', %d)", seqName.toLowerCase(), value);
127+
}
128+
else if (dbType == DatabaseType.SQLSERVER || dbType == DatabaseType.SQLAZURE) {
129+
sql = String.format("ALTER SEQUENCE [%s] RESTART WITH %d", seqName, value);
130+
}
126131
}
132+
127133
if (sql != null) {
128134
if (LOG.isDebugEnabled()) {
129-
LOG.debug(String.format("Updating %s auto-increment counter with value '%d'", catalog_col.getName(), value));
135+
LOG.debug(String.format("Updating %s auto-increment counter %s with value '%d'", catalog_col.getName(), seqName, value));
130136
}
131137
try (Statement stmt = conn.createStatement()) {
132138
boolean result = stmt.execute(sql);

src/main/java/com/oltpbenchmark/api/Procedure.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,13 @@ public final PreparedStatement getPreparedStatementReturnKeys(Connection conn, S
109109

110110
// HACK: If the target system is Postgres, wrap the PreparedStatement in a special
111111
// one that fakes the getGeneratedKeys().
112-
if (is != null && (this.dbType == DatabaseType.POSTGRES || this.dbType == DatabaseType.COCKROACHDB)) {
112+
if (is != null && (
113+
this.dbType == DatabaseType.POSTGRES
114+
|| this.dbType == DatabaseType.COCKROACHDB
115+
|| this.dbType == DatabaseType.SQLSERVER
116+
|| this.dbType == DatabaseType.SQLAZURE
117+
)
118+
) {
113119
pStmt = new AutoIncrementPreparedStatement(this.dbType, conn.prepareStatement(stmt.getSQL()));
114120
}
115121
// Everyone else can use the regular getGeneratedKeys() method
@@ -121,7 +127,6 @@ else if (is != null) {
121127
pStmt = conn.prepareStatement(stmt.getSQL());
122128
}
123129

124-
125130
return (pStmt);
126131
}
127132

src/main/java/com/oltpbenchmark/benchmarks/tpch/procedures/GenericQuery.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.sql.PreparedStatement;
2727
import java.sql.ResultSet;
2828
import java.sql.SQLException;
29+
import java.sql.SQLSyntaxErrorException;
2930

3031
public abstract class GenericQuery extends Procedure {
3132

@@ -34,12 +35,18 @@ public abstract class GenericQuery extends Procedure {
3435
protected abstract PreparedStatement getStatement(Connection conn, RandomGenerator rand, double scaleFactor) throws SQLException;
3536

3637
public void run(Connection conn, RandomGenerator rand, double scaleFactor) throws SQLException {
37-
38-
try (PreparedStatement stmt = getStatement(conn, rand, scaleFactor); ResultSet rs = stmt.executeQuery()) {
39-
while (rs.next()) {
40-
//do nothing
38+
try (PreparedStatement stmt = getStatement(conn, rand, scaleFactor)) {
39+
try (ResultSet rs = stmt.executeQuery()) {
40+
while (rs.next()) {
41+
//do nothing
42+
}
43+
}
44+
catch (SQLSyntaxErrorException ex) {
45+
if (LOG.isDebugEnabled()) {
46+
LOG.debug(this.getClass().getName() + ": stmt: " + stmt.toString());
47+
}
48+
throw ex;
4149
}
4250
}
43-
4451
}
4552
}

0 commit comments

Comments
 (0)