Skip to content

Commit c324099

Browse files
authored
Merge pull request ClickHouse#340 from acking-you/datafusion-issue-15465
[datafusion] Remove unnecessary type conversions
2 parents 52d4874 + 2e0bd8a commit c324099

File tree

4 files changed

+99
-109
lines changed

4 files changed

+99
-109
lines changed

datafusion/benchmark.sh

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,29 @@
11
#!/bin/bash
22

3-
# Install Rust
3+
echo "Install Rust"
44
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > rust-init.sh
55
bash rust-init.sh -y
66
source ~/.cargo/env
77

8+
echo "Install Dependencies"
9+
sudo apt-get update
10+
sudo apt-get install --yes gcc
811

9-
# Install Dependencies
10-
sudo yum update -y
11-
sudo yum install gcc -y
12-
13-
14-
# Install DataFusion main branch
12+
echo "Install DataFusion main branch"
1513
git clone https://github.com/apache/arrow-datafusion.git
1614
cd arrow-datafusion/datafusion-cli
1715
git checkout 45.0.0
1816
CARGO_PROFILE_RELEASE_LTO=true RUSTFLAGS="-C codegen-units=1" cargo build --release
1917
export PATH="`pwd`/target/release:$PATH"
2018
cd ../..
2119

22-
23-
# Download benchmark target data, single file
20+
echo "Download benchmark target data, single file"
2421
wget --continue https://datasets.clickhouse.com/hits_compatible/hits.parquet
2522

26-
# Download benchmark target data, partitioned
23+
echo "Download benchmark target data, partitioned"
2724
mkdir -p partitioned
2825
seq 0 99 | xargs -P100 -I{} bash -c 'wget --directory-prefix partitioned --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
2926

30-
# Run benchmarks for single parquet and partitioned
27+
echo "Run benchmarks for single parquet and partitioned"
3128
./run.sh single
3229
./run.sh partitioned
33-

datafusion/queries.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
2121
SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
2222
SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
2323
SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
24-
SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
25-
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
24+
SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
25+
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
2626
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
27-
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10;
27+
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
2828
SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
2929
SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\\.)?([^/]+)/.*$', '\\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
3030
SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits;
Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,55 @@
11
{
22
"system": "DataFusion (Parquet, partitioned)",
3-
"date": "2024-02-08",
3+
"date": "2024-03-29",
44
"machine": "c6a.4xlarge, 500gb gp2",
55
"cluster_size": 1,
66
"comment": "v45.0.0 (26058ac)",
7-
87
"tags": ["Rust", "column-oriented", "embedded", "stateless"],
9-
108
"load_time": 0,
119
"data_size": 14779976446,
12-
1310
"result": [
14-
[0.059, 0.020, 0.021],
15-
[0.103, 0.034, 0.032],
16-
[0.188, 0.081, 0.077],
17-
[0.387, 0.089, 0.081],
18-
[0.976, 0.790, 0.792],
19-
[0.982, 0.793, 0.801],
20-
[0.093, 0.031, 0.031],
21-
[0.118, 0.037, 0.038],
22-
[0.997, 0.869, 0.861],
23-
[1.313, 0.987, 0.985],
24-
[0.530, 0.241, 0.250],
25-
[0.607, 0.273, 0.273],
26-
[1.047, 0.849, 0.869],
27-
[2.534, 1.343, 1.203],
28-
[1.095, 0.816, 0.792],
29-
[1.038, 0.952, 0.942],
30-
[2.586, 1.690, 1.710],
31-
[2.500, 1.585, 1.585],
32-
[5.162, 3.475, 3.434],
33-
[0.288, 0.074, 0.073],
34-
[9.896, 1.061, 1.038],
35-
[11.254, 1.246, 1.283],
36-
[21.845, 2.537, 2.541],
37-
[55.438, 9.532, 9.583],
38-
[2.700, 0.436, 0.451],
39-
[0.811, 0.367, 0.355],
40-
[2.702, 0.519, 0.501],
41-
[9.636, 1.464, 1.437],
42-
[9.892, 9.399, 9.500],
43-
[0.521, 0.435, 0.436],
44-
[2.390, 0.751, 0.751],
45-
[5.944, 0.891, 0.890],
46-
[4.703, 3.474, 3.364],
47-
[10.206, 3.615, 3.636],
48-
[10.171, 3.663, 3.631],
49-
[1.289, 1.150, 1.158],
50-
[0.394, 0.188, 0.185],
51-
[0.215, 0.081, 0.080],
52-
[0.289, 0.109, 0.115],
53-
[0.633, 0.346, 0.344],
54-
[0.170, 0.044, 0.045],
55-
[0.159, 0.038, 0.042],
56-
[0.158, 0.052, 0.051]
11+
[0.062, 0.019, 0.021],
12+
[0.117, 0.036, 0.035],
13+
[0.206, 0.084, 0.081],
14+
[0.385, 0.087, 0.086],
15+
[1.077, 0.873, 0.843],
16+
[1.014, 0.860, 0.858],
17+
[0.102, 0.036, 0.033],
18+
[0.125, 0.037, 0.038],
19+
[1.116, 0.943, 0.962],
20+
[1.343, 1.068, 1.041],
21+
[0.509, 0.258, 0.253],
22+
[0.639, 0.287, 0.302],
23+
[1.081, 0.891, 0.901],
24+
[2.708, 1.370, 1.418],
25+
[1.104, 0.854, 0.864],
26+
[1.108, 1.021, 1.016],
27+
[2.679, 1.820, 1.858],
28+
[2.511, 1.684, 1.696],
29+
[5.435, 3.718, 3.841],
30+
[0.260, 0.076, 0.081],
31+
[9.906, 1.077, 1.101],
32+
[11.275, 1.294, 1.288],
33+
[21.902, 2.510, 2.573],
34+
[55.539, 9.451, 9.515],
35+
[2.685, 0.453, 0.445],
36+
[0.798, 0.358, 0.358],
37+
[2.709, 0.514, 0.504],
38+
[9.663, 1.479, 1.502],
39+
[10.040, 9.552, 9.835],
40+
[0.524, 0.423, 0.413],
41+
[2.376, 0.796, 0.805],
42+
[5.963, 0.945, 0.911],
43+
[4.713, 3.666, 3.668],
44+
[10.233, 3.811, 3.796],
45+
[10.265, 3.756, 3.773],
46+
[1.378, 1.277, 1.249],
47+
[0.406, 0.203, 0.203],
48+
[0.209, 0.098, 0.082],
49+
[0.309, 0.117, 0.121],
50+
[0.642, 0.384, 0.369],
51+
[0.181, 0.052, 0.051],
52+
[0.167, 0.046, 0.045],
53+
[0.178, 0.055, 0.054]
5754
]
5855
}

datafusion/results/single.json

Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,55 @@
11
{
22
"system": "DataFusion (Parquet, single)",
3-
"date": "2024-02-08",
3+
"date": "2024-03-29",
44
"machine": "c6a.4xlarge, 500gb gp2",
55
"cluster_size": 1,
66
"comment": "v45.0.0 (26058ac)",
7-
87
"tags": ["Rust", "column-oriented", "embedded", "stateless"],
9-
108
"load_time": 0,
119
"data_size": 14779976446,
12-
1310
"result": [
14-
[0.091, 0.059, 0.053],
15-
[0.145, 0.074, 0.067],
16-
[0.204, 0.115, 0.109],
17-
[0.353, 0.112, 0.113],
18-
[0.951, 0.851, 0.838],
19-
[1.033, 0.899, 0.901],
20-
[0.120, 0.066, 0.068],
21-
[0.141, 0.083, 0.077],
22-
[1.020, 0.901, 0.901],
23-
[1.288, 1.018, 1.046],
24-
[0.461, 0.266, 0.277],
25-
[0.545, 0.301, 0.308],
26-
[1.097, 0.931, 0.938],
27-
[2.596, 1.315, 1.314],
28-
[1.048, 0.891, 0.877],
29-
[1.090, 0.973, 0.974],
30-
[2.591, 1.797, 1.818],
31-
[2.503, 1.661, 1.668],
32-
[5.074, 3.473, 3.469],
33-
[0.270, 0.107, 0.110],
34-
[9.744, 1.121, 1.111],
35-
[11.237, 1.445, 1.423],
36-
[22.074, 3.566, 3.478],
37-
[55.972, 9.819, 9.872],
38-
[2.579, 0.567, 0.560],
39-
[0.814, 0.492, 0.493],
40-
[2.576, 0.659, 0.632],
41-
[9.598, 1.572, 1.552],
42-
[10.668, 10.012, 10.155],
43-
[0.562, 0.473, 0.465],
44-
[2.281, 0.860, 0.873],
45-
[5.695, 0.991, 0.946],
46-
[4.506, 3.428, 3.497],
47-
[10.139, 3.779, 3.859],
48-
[10.091, 3.804, 3.776],
49-
[1.315, 1.191, 1.235],
50-
[0.432, 0.234, 0.238],
51-
[0.275, 0.156, 0.156],
52-
[0.342, 0.155, 0.157],
53-
[0.667, 0.423, 0.405],
54-
[0.202, 0.080, 0.078],
55-
[0.186, 0.075, 0.076],
56-
[0.187, 0.099, 0.084]
11+
[0.106, 0.058, 0.070],
12+
[0.139, 0.077, 0.085],
13+
[0.217, 0.117, 0.127],
14+
[0.340, 0.121, 0.123],
15+
[1.029, 0.906, 0.900],
16+
[1.089, 0.950, 0.957],
17+
[0.123, 0.068, 0.077],
18+
[0.148, 0.085, 0.083],
19+
[1.134, 1.005, 0.977],
20+
[1.314, 1.095, 1.096],
21+
[0.484, 0.293, 0.290],
22+
[0.574, 0.313, 0.310],
23+
[1.141, 0.991, 0.971],
24+
[2.609, 1.554, 1.532],
25+
[1.086, 0.922, 0.941],
26+
[1.197, 1.056, 1.081],
27+
[2.676, 1.935, 1.930],
28+
[2.552, 1.805, 1.800],
29+
[5.212, 3.746, 3.798],
30+
[0.243, 0.117, 0.116],
31+
[9.731, 1.176, 1.139],
32+
[11.252, 1.439, 1.453],
33+
[22.124, 3.480, 3.546],
34+
[55.992, 9.964, 9.912],
35+
[2.588, 0.587, 0.563],
36+
[0.808, 0.507, 0.511],
37+
[2.584, 0.646, 0.640],
38+
[9.573, 1.607, 1.621],
39+
[10.865, 10.256, 10.331],
40+
[0.559, 0.454, 0.462],
41+
[2.281, 0.920, 0.936],
42+
[5.692, 1.004, 1.044],
43+
[4.539, 3.731, 3.757],
44+
[10.240, 3.861, 3.946],
45+
[10.201, 3.906, 3.870],
46+
[1.412, 1.288, 1.281],
47+
[0.421, 0.255, 0.243],
48+
[0.292, 0.154, 0.156],
49+
[0.360, 0.169, 0.189],
50+
[0.712, 0.413, 0.413],
51+
[0.211, 0.082, 0.087],
52+
[0.198, 0.091, 0.078],
53+
[0.199, 0.094, 0.089]
5754
]
5855
}

0 commit comments

Comments
 (0)