Skip to content

Commit dc2c12d

Browse files
authored
Merge pull request #10588 from everpcpc/fix-tpch
chore(ci): add script to load tpch100
2 parents 12f187c + 4a6420a commit dc2c12d

File tree

6 files changed

+180
-11
lines changed

6 files changed

+180
-11
lines changed

.github/actions/create_check_status/action.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ inputs:
1616
conclusion:
1717
description: "check conclusion, Can be one of: action_required, cancelled, failure, neutral, success, skipped, stale, timed_out"
1818
required: false
19+
run_id:
20+
description: "external id to associate with the check run"
21+
required: false
1922
title:
2023
description: "check title"
2124
required: true
@@ -35,6 +38,7 @@ runs:
3538
param_name: ${{ inputs.name }}
3639
param_status: ${{ inputs.status }}
3740
param_conclusion: ${{ inputs.conclusion }}
41+
param_run_id: ${{ inputs.run_id }}
3842
param_title: ${{ inputs.title }}
3943
param_summary: ${{ inputs.summary }}
4044
param_url: https://github.com/datafuselabs/databend/actions/runs/${{ github.run_id }}
@@ -58,4 +62,7 @@ runs:
5862
if (process.env.param_status === 'completed') {
5963
data.conclusion = process.env.param_conclusion;
6064
}
65+
if (process.env.param_run_id) {
66+
data.external_id = process.env.param_run_id;
67+
}
6168
await github.rest.checks.create(data);

.github/workflows/benchmark.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,10 @@ jobs:
4141
- name: Generate benchmark metadata
4242
run: |
4343
echo '{}' > metadata.json
44-
jq ".sha = \"${{ github.event.pull_request.head.sha }}\"" <metadata.json >metadata.json.tmp && mv metadata.json.tmp metadata.json
44+
jq ".sha = \"${{ github.sha }}\"" <metadata.json >metadata.json.tmp && mv metadata.json.tmp metadata.json
4545
jq ".run_id = \"${{ github.run_id }}\"" <metadata.json >metadata.json.tmp && mv metadata.json.tmp metadata.json
4646
jq ".pr_id = \"${{ github.event.pull_request.number }}\"" <metadata.json >metadata.json.tmp && mv metadata.json.tmp metadata.json
47+
jq ".head_sha = \"${{ github.event.pull_request.head.sha }}\"" <metadata.json >metadata.json.tmp && mv metadata.json.tmp metadata.json
4748
- name: Upload artifact
4849
uses: actions/upload-artifact@v3
4950
with:

.github/workflows/dev-linux.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ on:
1111
- "docker/**"
1212
- "scripts/setup/**"
1313
- ".devcontainer/**"
14+
merge_group:
15+
types:
16+
- checks_requested
1417

1518
concurrency:
1619
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}

.github/workflows/trusted-benchmark.yml

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
runs-on: [self-hosted, X64, Linux, dev]
2828
outputs:
2929
sha: ${{ steps.metadata.outputs.sha }}
30+
head_sha: ${{ steps.metadata.outputs.head_sha }}
3031
run_id: ${{ steps.metadata.outputs.run_id }}
3132
source: ${{ steps.metadata.outputs.source }}
3233
source_id: ${{ steps.metadata.outputs.source_id }}
@@ -42,12 +43,14 @@ jobs:
4243
workflow_run)
4344
gh run download ${{ github.event.workflow_run.id }} --name benchmark-metadata
4445
echo "sha=$(jq -r '.sha' metadata.json)" >> $GITHUB_OUTPUT
46+
echo "head_sha=$(jq -r '.head_sha' metadata.json)" >> $GITHUB_OUTPUT
4547
echo "run_id=$(jq -r '.run_id' metadata.json)" >> $GITHUB_OUTPUT
4648
echo "source=pr" >> $GITHUB_OUTPUT
4749
echo "source_id=$(jq -r '.pr_id' metadata.json)" >> $GITHUB_OUTPUT
4850
;;
4951
workflow_call)
5052
echo "sha=${{ github.sha }}" >> $GITHUB_OUTPUT
53+
echo "head_sha=${{ github.sha }}" >> $GITHUB_OUTPUT
5154
echo "run_id=${{ github.run_id }}" >> $GITHUB_OUTPUT
5255
echo "source=release" >> $GITHUB_OUTPUT
5356
echo "source_id=${{ github.event.inputs.tag }}" >> $GITHUB_OUTPUT
@@ -58,49 +61,61 @@ jobs:
5861
;;
5962
esac
6063
- uses: ./.github/actions/create_check_status
64+
if: steps.metadata.outputs.source == 'pr'
6165
with:
6266
github_token: ${{ secrets.GITHUB_TOKEN }}
63-
sha: ${{ steps.metadata.outputs.sha }}
67+
sha: ${{ steps.metadata.outputs.head_sha }}
68+
run_id: ${{ steps.metadata.outputs.run_id }}
6469
name: 'Benchmark Local / hits'
6570
status: queued
6671
title: "Benchmark Local for hits"
6772
summary: "status: **queued**"
6873
- uses: ./.github/actions/create_check_status
74+
if: steps.metadata.outputs.source == 'pr'
6975
with:
7076
github_token: ${{ secrets.GITHUB_TOKEN }}
71-
sha: ${{ steps.metadata.outputs.sha }}
77+
sha: ${{ steps.metadata.outputs.head_sha }}
78+
run_id: ${{ steps.metadata.outputs.run_id }}
7279
name: 'Benchmark Local / tpch'
7380
status: queued
7481
title: "Benchmark Local for tpch"
7582
summary: "status: **queued**"
7683
- uses: ./.github/actions/create_check_status
84+
if: steps.metadata.outputs.source == 'pr'
7785
with:
7886
github_token: ${{ secrets.GITHUB_TOKEN }}
79-
sha: ${{ steps.metadata.outputs.sha }}
87+
sha: ${{ steps.metadata.outputs.head_sha }}
88+
run_id: ${{ steps.metadata.outputs.run_id }}
8089
name: 'Benchmark Cloud / hits(Medium)'
8190
status: queued
8291
title: "Benchmark Cloud for hits with Medium warehouse"
8392
summary: "status: **queued**"
8493
- uses: ./.github/actions/create_check_status
94+
if: steps.metadata.outputs.source == 'pr'
8595
with:
8696
github_token: ${{ secrets.GITHUB_TOKEN }}
87-
sha: ${{ steps.metadata.outputs.sha }}
97+
sha: ${{ steps.metadata.outputs.head_sha }}
98+
run_id: ${{ steps.metadata.outputs.run_id }}
8899
name: 'Benchmark Cloud / tpch(Medium)'
89100
status: queued
90101
title: "Benchmark Cloud for tpch with Medium warehouse"
91102
summary: "status: **queued**"
92103
- uses: ./.github/actions/create_check_status
104+
if: steps.metadata.outputs.source == 'pr'
93105
with:
94106
github_token: ${{ secrets.GITHUB_TOKEN }}
95-
sha: ${{ steps.metadata.outputs.sha }}
107+
sha: ${{ steps.metadata.outputs.head_sha }}
108+
run_id: ${{ steps.metadata.outputs.run_id }}
96109
name: 'Benchmark Cloud / hits(Large)'
97110
status: queued
98111
title: "Benchmark Cloud for hits with Large warehouse"
99112
summary: "status: **queued**"
100113
- uses: ./.github/actions/create_check_status
114+
if: steps.metadata.outputs.source == 'pr'
101115
with:
102116
github_token: ${{ secrets.GITHUB_TOKEN }}
103-
sha: ${{ steps.metadata.outputs.sha }}
117+
sha: ${{ steps.metadata.outputs.head_sha }}
118+
run_id: ${{ steps.metadata.outputs.run_id }}
104119
name: 'Benchmark Cloud / tpch(Large)'
105120
status: queued
106121
title: "Benchmark Cloud for tpch with Large warehouse"
@@ -145,10 +160,12 @@ jobs:
145160
databend-query --version
146161
databend-meta --version
147162
- name: Set github check status to pending
163+
if: steps.metadata.outputs.source == 'pr'
148164
uses: ./.github/actions/create_check_status
149165
with:
150166
github_token: ${{ secrets.GITHUB_TOKEN }}
151-
sha: ${{ needs.metadata.outputs.sha }}
167+
sha: ${{ needs.metadata.outputs.head_sha }}
168+
run_id: ${{ steps.metadata.outputs.run_id }}
152169
name: 'Benchmark Local / ${{ matrix.dataset }}'
153170
status: in_progress
154171
title: "Benchmark Local for ${{ matrix.dataset }}"
@@ -162,10 +179,12 @@ jobs:
162179
source: ${{ needs.metadata.outputs.source }}
163180
source_id: ${{ needs.metadata.outputs.source_id }}
164181
- name: Set github check status to completed
182+
if: steps.metadata.outputs.source == 'pr'
165183
uses: ./.github/actions/create_check_status
166184
with:
167185
github_token: ${{ secrets.GITHUB_TOKEN }}
168-
sha: ${{ needs.metadata.outputs.sha }}
186+
sha: ${{ needs.metadata.outputs.head_sha }}
187+
run_id: ${{ steps.metadata.outputs.run_id }}
169188
name: 'Benchmark Local / ${{ matrix.dataset }}'
170189
status: completed
171190
conclusion: ${{ job.status }}
@@ -242,10 +261,12 @@ jobs:
242261
- uses: actions/checkout@v3
243262
- uses: ./.github/actions/setup_bendsql
244263
- name: Set github check status to pending
264+
if: steps.metadata.outputs.source == 'pr'
245265
uses: ./.github/actions/create_check_status
246266
with:
247267
github_token: ${{ secrets.GITHUB_TOKEN }}
248-
sha: ${{ needs.metadata.outputs.sha }}
268+
sha: ${{ needs.metadata.outputs.head_sha }}
269+
run_id: ${{ steps.metadata.outputs.run_id }}
249270
name: 'Benchmark Cloud / ${{ matrix.dataset }}(${{ matrix.size }})'
250271
status: in_progress
251272
title: "Benchmark Cloud for ${{ matrix.dataset }} with ${{ matrix.size }} warehouse"
@@ -265,10 +286,12 @@ jobs:
265286
cloud_org: ${{ secrets.BENCHMARK_CLOUD_ORG }}
266287
cloud_endpoint: ${{ secrets.BENCHMARK_CLOUD_ENDPOINT }}
267288
- name: Set github check status to pending
289+
if: steps.metadata.outputs.source == 'pr'
268290
uses: ./.github/actions/create_check_status
269291
with:
270292
github_token: ${{ secrets.GITHUB_TOKEN }}
271-
sha: ${{ needs.metadata.outputs.sha }}
293+
sha: ${{ needs.metadata.outputs.head_sha }}
294+
run_id: ${{ steps.metadata.outputs.run_id }}
272295
name: 'Benchmark Cloud / ${{ matrix.dataset }}(${{ matrix.size }})'
273296
status: completed
274297
conclusion: ${{ job.status }}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
cat <<SQL | bendsql query
6+
select version();
7+
SQL
8+
9+
for t in customer lineitem nation orders partsupp part region supplier; do
10+
echo "DROP TABLE IF EXISTS $t;" | bendsql query
11+
done
12+
13+
cat <<SQL | bendsql query
14+
CREATE TABLE IF NOT EXISTS customer (
15+
c_custkey BIGINT not null,
16+
c_name STRING not null,
17+
c_address STRING not null,
18+
c_nationkey INTEGER not null,
19+
c_phone STRING not null,
20+
c_acctbal DECIMAL(15, 2) not null,
21+
c_mktsegment STRING not null,
22+
c_comment STRING not null
23+
) CLUSTER BY (c_custkey);
24+
SQL
25+
26+
cat <<SQL | bendsql query
27+
CREATE TABLE IF NOT EXISTS lineitem (
28+
l_orderkey BIGINT not null,
29+
l_partkey BIGINT not null,
30+
l_suppkey BIGINT not null,
31+
l_linenumber BIGINT not null,
32+
l_quantity DECIMAL(15, 2) not null,
33+
l_extendedprice DECIMAL(15, 2) not null,
34+
l_discount DECIMAL(15, 2) not null,
35+
l_tax DECIMAL(15, 2) not null,
36+
l_returnflag STRING not null,
37+
l_linestatus STRING not null,
38+
l_shipdate DATE not null,
39+
l_commitdate DATE not null,
40+
l_receiptdate DATE not null,
41+
l_shipinstruct STRING not null,
42+
l_shipmode STRING not null,
43+
l_comment STRING not null
44+
) CLUSTER BY(l_shipdate, l_orderkey);
45+
SQL
46+
47+
# create tpch tables
48+
cat <<SQL | bendsql query
49+
CREATE TABLE IF NOT EXISTS nation (
50+
n_nationkey INTEGER not null,
51+
n_name STRING not null,
52+
n_regionkey INTEGER not null,
53+
n_comment STRING
54+
) CLUSTER BY (n_nationkey);
55+
SQL
56+
57+
cat <<SQL | bendsql query
58+
CREATE TABLE IF NOT EXISTS orders (
59+
o_orderkey BIGINT not null,
60+
o_custkey BIGINT not null,
61+
o_orderstatus STRING not null,
62+
o_totalprice DECIMAL(15, 2) not null,
63+
o_orderdate DATE not null,
64+
o_orderpriority STRING not null,
65+
o_clerk STRING not null,
66+
o_shippriority INTEGER not null,
67+
o_comment STRING not null
68+
) CLUSTER BY (o_orderkey, o_orderdate);
69+
SQL
70+
71+
cat <<SQL | bendsql query
72+
CREATE TABLE IF NOT EXISTS partsupp (
73+
ps_partkey BIGINT not null,
74+
ps_suppkey BIGINT not null,
75+
ps_availqty BIGINT not null,
76+
ps_supplycost DECIMAL(15, 2) not null,
77+
ps_comment STRING not null
78+
) CLUSTER BY (ps_partkey);
79+
SQL
80+
81+
cat <<SQL | bendsql query
82+
CREATE TABLE IF NOT EXISTS part (
83+
p_partkey BIGINT not null,
84+
p_name STRING not null,
85+
p_mfgr STRING not null,
86+
p_brand STRING not null,
87+
p_type STRING not null,
88+
p_size INTEGER not null,
89+
p_container STRING not null,
90+
p_retailprice DECIMAL(15, 2) not null,
91+
p_comment STRING not null
92+
) CLUSTER BY (p_partkey);
93+
SQL
94+
95+
cat <<SQL | bendsql query
96+
CREATE TABLE IF NOT EXISTS region (
97+
r_regionkey INTEGER not null,
98+
r_name STRING not null,
99+
r_comment STRING
100+
) CLUSTER BY (r_regionkey);
101+
SQL
102+
103+
cat <<SQL | bendsql query
104+
CREATE TABLE IF NOT EXISTS supplier (
105+
s_suppkey BIGINT not null,
106+
s_name STRING not null,
107+
s_address STRING not null,
108+
s_nationkey INTEGER not null,
109+
s_phone STRING not null,
110+
s_acctbal DECIMAL(15, 2) not null,
111+
s_comment STRING not null
112+
) CLUSTER BY (s_suppkey);
113+
SQL
114+
115+
for t in nation region; do
116+
echo "loading into $t ..."
117+
cat <<SQL | bendsql query
118+
COPY INTO $t FROM 's3://repo.databend.rs/tpch100/${t}.tbl'
119+
credentials=(aws_key_id='$REPO_ACCESS_KEY_ID' aws_secret_key='$REPO_SECRET_ACCESS_KEY')
120+
file_format=(type='CSV' field_delimiter='|' record_delimiter='\\n' skip_header=1);
121+
ANALYZE TABLE "${t}";
122+
SELECT count(*) as count_${t} FROM "${t}";
123+
SQL
124+
done
125+
126+
for t in customer lineitem orders partsupp part supplier; do
127+
echo "loading into $t ..."
128+
cat <<SQL | bendsql query
129+
COPY INTO $t FROM 's3://repo.databend.rs/tpch100/${t}/'
130+
credentials=(aws_key_id='$REPO_ACCESS_KEY_ID' aws_secret_key='$REPO_SECRET_ACCESS_KEY') pattern ='${t}.tbl.*'
131+
file_format=(type='CSV' field_delimiter='|' record_delimiter='\\n' skip_header=1);
132+
ANALYZE TABLE "${t}";
133+
SELECT count(*) as count_${t} FROM "${t}";
134+
SQL
135+
done

0 commit comments

Comments
 (0)