Skip to content

Commit c37a35b

Browse files
committed
Oxla clean-up.
1 parent aa0e9ca commit c37a35b

File tree

1 file changed

+52
-33
lines changed

1 file changed

+52
-33
lines changed

oxla/benchmark.sh

Lines changed: 52 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
#!/bin/bash
1+
#!/bin/bash -e
2+
3+
# cleanup
4+
sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
5+
sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
26

37
# docker
48
sudo rm /usr/share/keyrings/docker-archive-keyring.gpg
@@ -8,7 +12,7 @@ sudo apt update
812
sudo apt install -y docker-ce
913

1014
# base
11-
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2
15+
sudo apt-get install -y postgresql-client curl wget apt-transport-https ca-certificates software-properties-common gnupg2 parallel
1216
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential
1317

1418
# ruby and fake S3
@@ -33,54 +37,60 @@ echo -e "[default]\nregion = none" > ~/.aws/config
3337
echo -e "[default]\naws_access_key_id = none\naws_secret_access_key = none" > ~/.aws/credentials
3438

3539
# run fake S3
36-
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | xargs -r kill -9
37-
40+
sudo ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
3841
sudo rm -rf /mnt/fakes3_root
3942
sudo mkdir -p /mnt/fakes3_root
4043
sudo chmod a+rw /mnt/fakes3_root -R
4144
fakes3 -r /mnt/fakes3_root -H 0.0.0.0 -p 4569 --license license.pdf > /dev/null 2>&1 &
45+
sleep 10 # waiting for container start
4246

43-
# # download dataset
44-
# wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
45-
# gzip -d hits.tsv.gz
46-
# chmod 777 ~ hits.tsv
47+
# download dataset
48+
wget --no-verbose --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
49+
gzip -d hits.tsv.gz
50+
chmod 777 ~ hits.tsv
4751

48-
# # convert dataset to csv
49-
# rm -f hits_part*.csv
50-
# curl https://clickhouse.com/ | sh
51-
# ./clickhouse local --query "SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
52-
# rm hits.tsv
52+
# convert dataset to csv
53+
rm -f part_*.csv
54+
curl https://clickhouse.com/ | sh
55+
./clickhouse local --query "SELECT * FROM 'hits.tsv' INTO OUTFILE 'hits.csv'"
56+
rm hits.tsv
5357

54-
# split -l 5000000 hits.csv part_
55-
# for file in part_*; do mv "$file" "${file}.csv"; done
58+
# prepare digestable parts (5m rows each) of hits.csv
59+
split -l 5000000 hits.csv part_
60+
for file in part_*; do mv "$file" "${file}.csv"; done
5661

57-
# upload dataset to fake S3 bucket
62+
# upload dataset (prepared parts) to fake S3 bucket
5863
aws s3 mb s3://my-new-bucket --endpoint-url http://localhost:4569
5964

6065
for file in part_*.csv; do
6166
echo "Processing file: $file"
6267

6368
# copy the file to the S3 bucket
64-
aws s3 cp "./$file" s3://my-new-bucket --endpoint-url http://localhost:4569
69+
aws s3 cp "./$file" s3://my-new-bucket --endpoint-url http://localhost:4569 > /dev/null 2>&1
6570

66-
# clean-up tmp pars left after upload
67-
for key in $(aws s3api list-objects --bucket my-new-bucket --query "Contents[?contains(Key, '_${file}_')].Key" --output text --endpoint-url http://localhost:4569); do
68-
aws s3api delete-object --bucket my-new-bucket --key "$key" --endpoint-url http://localhost:4569
69-
done
71+
# clean-up tmp parts left after upload
72+
TMPPARTS=$(aws s3api list-objects --bucket my-new-bucket --query "Contents[?contains(Key, '_${file}_')].Key" --output text --endpoint-url http://localhost:4569)
73+
echo $TMPPARTS | tr ' ' '\n' | grep . | parallel -j16 aws s3api delete-object --bucket my-new-bucket --key {} --endpoint-url http://localhost:4569
7074
done
7175

7276
# get and configure Oxla image
73-
sudo docker run --rm -it -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest &
74-
sudo docker exec -it oxlacontainer /bin/bash -c "sed -i 's#endpoint: \"\"#endpoint: \"http://localhost:4569\"#g' oxla/default_config.yml"
75-
sudo docker exec -it oxlacontainer /bin/bash -c "sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
77+
echo "Install and run Oxla."
78+
79+
sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
80+
81+
sudo docker run --rm -p 5432:5432 --name oxlacontainer public.ecr.aws/oxla/release:latest > /dev/null 2>&1 &
82+
sleep 10 # waiting for container start and db initialisation (leader election, etc.)
83+
84+
sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint: \"\"#endpoint: \"http://localhost:4569\"#g' oxla/default_config.yml"
85+
sudo docker exec oxlacontainer /bin/bash -c "sed -i 's#endpoint:.*#endpoint: '\''http://localhost:4569'\''#g' oxla/startup_config/config.yml"
86+
sudo docker rmi oxla-configured-image:latest > /dev/null 2>&1 || echo "" > /dev/null
7687
sudo docker commit oxlacontainer oxla-configured-image
77-
sudo docker stop oxlacontainer
7888

79-
# run oxla
80-
sudo docker run --rm -it -p 5432:5432 --net=host --name oxlacontainer oxla-configured-image &
89+
sudo ps aux | grep oxla | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
8190

82-
# sleep, waiting for initialisation (leader election, etc.)
83-
sleep(10)
91+
# run oxla
92+
sudo docker run --rm --net=host --name oxlacontainer oxla-configured-image > /dev/null 2>&1 &
93+
sleep 10 # waiting for container start and db initialisation (leader election, etc.)
8494

8595
# create table and ingest data
8696
export PGCLIENTENCODING=UTF8
@@ -90,15 +100,24 @@ psql -h localhost -p 5432 -U postgres -d test -t < create.sql
90100
for file in part_*.csv; do
91101
echo "Processing file: $file"
92102
psql -h localhost -p 5432 -U postgres -d test -t -c '\timing' -c "COPY hits FROM 's3://my-new-bucket/$file';"
103+
aws s3api delete-object --bucket my-new-bucket --key "$file" --endpoint-url http://localhost:4569
93104
done
94105

95-
# kill fake S3
96-
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | xargs -r kill -9
97-
sudo rm -rf /mnt/fakes3_root
106+
# get ingested data size
107+
echo "data size after ingest:"
108+
sudo docker exec oxlacontainer /bin/bash -c "du -s oxla/data"
98109

99-
sudo docker exec -it oxlacontainer /bin/bash -c "du -sh oxla/data"
110+
# wait for merges to finish
111+
sleep 30
112+
113+
# kill fake S3 and remove its data
114+
ps aux | grep fakes3 | grep -v grep | awk '{print $2}' | sudo xargs -r kill -9
115+
sudo rm -rf /mnt/fakes3_root
100116

117+
# run benchmark
118+
echo "running benchmark..."
101119
./run.sh 2>&1 | tee log.txt
102120

121+
# format results
103122
cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' |
104123
awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'

0 commit comments

Comments
 (0)