1
- #! /bin/bash
1
+ #! /bin/bash -e
2
+
3
+ # cleanup
4
+ sudo ps aux | grep fakes3 | grep -v grep | awk ' {print $2}' | sudo xargs -r kill -9
5
+ sudo ps aux | grep oxla | grep -v grep | awk ' {print $2}' | sudo xargs -r kill -9
2
6
3
7
# docker
4
8
sudo rm /usr/share/keyrings/docker-archive-keyring.gpg
@@ -8,7 +12,7 @@ sudo apt update
8
12
sudo apt install -y docker-ce
9
13
10
14
# base
11
- sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2
15
+ sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2 parallel
12
16
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential
13
17
14
18
# ruby and fake S3
@@ -33,54 +37,60 @@ echo -e "[default]\nregion = none" > ~/.aws/config
33
37
echo -e " [default]\naws_access_key_id = none\naws_secret_access_key = none" > ~ /.aws/credentials
34
38
35
39
# run fake S3
36
- ps aux | grep fakes3 | grep -v grep | awk ' {print $2}' | xargs -r kill -9
37
-
40
+ sudo ps aux | grep fakes3 | grep -v grep | awk ' {print $2}' | sudo xargs -r kill -9
38
41
sudo rm -rf /mnt/fakes3_root
39
42
sudo mkdir -p /mnt/fakes3_root
40
43
sudo chmod a+rw /mnt/fakes3_root -R
41
44
fakes3 -r /mnt/fakes3_root -H 0.0.0.0 -p 4569 --license license.pdf > /dev/null 2>&1 &
45
+ sleep 10 # waiting for container start
42
46
43
- # # download dataset
44
- # wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
45
- # gzip -d hits.tsv.gz
46
- # chmod 777 ~ hits.tsv
47
+ # download dataset
48
+ wget --no-verbose --continue ' https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
49
+ gzip -d hits.tsv.gz
50
+ chmod 777 ~ hits.tsv
47
51
48
- # # convert dataset to csv
49
- # rm -f hits_part *.csv
50
- # curl https://clickhouse.com/ | sh
51
- # ./clickhouse local --query "SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
52
- # rm hits.tsv
52
+ # convert dataset to csv
53
+ rm -f part_ * .csv
54
+ curl https://clickhouse.com/ | sh
55
+ ./clickhouse local --query " SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
56
+ rm hits.tsv
53
57
54
- # split -l 5000000 hits.csv part_
55
- # for file in part_*; do mv "$file" "${file}.csv"; done
58
+ # prepare digestable parts (5m rows each) of hits.csv
59
+ split -l 5000000 hits.csv part_
60
+ for file in part_* ; do mv " $file " " ${file} .csv" ; done
56
61
57
- # upload dataset to fake S3 bucket
62
+ # upload dataset (prepared parts) to fake S3 bucket
58
63
aws s3 mb s3://my-new-bucket --endpoint-url http://localhost:4569
59
64
60
65
for file in part_* .csv; do
61
66
echo " Processing file: $file "
62
67
63
68
# copy the file to the S3 bucket
64
- aws s3 cp " ./$file " s3://my-new-bucket --endpoint-url http://localhost:4569
69
+ aws s3 cp " ./$file " s3://my-new-bucket --endpoint-url http://localhost:4569 > /dev/null 2>&1
65
70
66
- # clean-up tmp pars left after upload
67
- for key in $( aws s3api list-objects --bucket my-new-bucket --query " Contents[?contains(Key, '_${file} _')].Key" --output text --endpoint-url http://localhost:4569) ; do
68
- aws s3api delete-object --bucket my-new-bucket --key " $key " --endpoint-url http://localhost:4569
69
- done
71
+ # clean-up tmp parts left after upload
72
+ TMPPARTS=$( aws s3api list-objects --bucket my-new-bucket --query " Contents[?contains(Key, '_${file} _')].Key" --output text --endpoint-url http://localhost:4569)
73
+ echo $TMPPARTS | tr ' ' ' \n' | grep . | parallel -j16 aws s3api delete-object --bucket my-new-bucket --key {} --endpoint-url http://localhost:4569
70
74
done
71
75
72
76
# get and configure Oxla image
73
- sudo docker run --rm -it -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest &
74
- sudo docker exec -it oxlacontainer /bin/bash -c " sed -i 's#endpoint: \"\" #endpoint: \" http://localhost:4569\" #g' oxla/default_config.yml"
75
- sudo docker exec -it oxlacontainer /bin/bash -c " sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
77
+ echo " Install and run Oxla."
78
+
79
+ sudo ps aux | grep oxla | grep -v grep | awk ' {print $2}' | sudo xargs -r kill -9
80
+
81
+ sudo docker run --rm -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest > /dev/null 2>&1 &
82
+ sleep 10 # waiting for container start and db initialisation (leader election, etc.)
83
+
84
+ sudo docker exec oxlacontainer /bin/bash -c " sed -i 's#endpoint: \"\" #endpoint: \" http://localhost:4569\" #g' oxla/default_config.yml"
85
+ sudo docker exec oxlacontainer /bin/bash -c " sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
86
+ sudo docker rmi oxla-configured-image:latest > /dev/null 2>&1 || echo " " > /dev/null
76
87
sudo docker commit oxlacontainer oxla-configured-image
77
- sudo docker stop oxlacontainer
78
88
79
- # run oxla
80
- sudo docker run --rm -it -p 5432:5432 --net=host --name oxlacontainer oxla-configured-image &
89
+ sudo ps aux | grep oxla | grep -v grep | awk ' {print $2}' | sudo xargs -r kill -9
81
90
82
- # sleep, waiting for initialisation (leader election, etc.)
83
- sleep(10)
91
+ # run oxla
92
+ sudo docker run --rm --net=host --name oxlacontainer oxla-configured-image > /dev/null 2>&1 &
93
+ sleep 10 # waiting for container start and db initialisation (leader election, etc.)
84
94
85
95
# create table and ingest data
86
96
export PGCLIENTENCODING=UTF8
@@ -90,15 +100,24 @@ psql -h localhost -p 5432 -U postgres -d test -t < create.sql
90
100
for file in part_* .csv; do
91
101
echo " Processing file: $file "
92
102
psql -h localhost -p 5432 -U postgres -d test -t -c ' \timing' -c " COPY hits FROM 's3://my-new-bucket/$file ';"
103
+ aws s3api delete-object --bucket my-new-bucket --key " $file " --endpoint-url http://localhost:4569
93
104
done
94
105
95
- # kill fake S3
96
- ps aux | grep fakes3 | grep -v grep | awk ' {print $2} ' | xargs -r kill -9
97
- sudo rm -rf /mnt/fakes3_root
106
+ # get ingested data size
107
+ echo " data size after ingest: "
108
+ sudo docker exec oxlacontainer /bin/bash -c " du -s oxla/data "
98
109
99
- sudo docker exec -it oxlacontainer /bin/bash -c " du -sh oxla/data"
110
+ # wait for merges to finish
111
+ sleep 30
112
+
113
+ # kill fake S3 and remove its data
114
+ ps aux | grep fakes3 | grep -v grep | awk ' {print $2}' | sudo xargs -r kill -9
115
+ sudo rm -rf /mnt/fakes3_root
100
116
117
+ # run benchmark
118
+ echo " running benchmark..."
101
119
./run.sh 2>&1 | tee log.txt
102
120
121
+ # format results
103
122
cat log.txt | grep -oP ' Time: \d+\.\d+ ms' | sed -r -e ' s/Time: ([0-9]+\.[0-9]+) ms/\1/' |
104
123
awk ' { if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
0 commit comments