Skip to content

Commit 48cc37b

Browse files
committed
add microbenchmark for hash expressions
1 parent 6ec70ae commit 48cc37b

File tree

1 file changed

+70
-0
lines changed

1 file changed

+70
-0
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
case class HashExprConfig(
23+
name: String,
24+
query: String,
25+
extraCometConfigs: Map[String, String] = Map.empty)
26+
27+
/**
28+
* Comprehensive benchmark for Comet hash expressions. To run this benchmark:
29+
* {{{
30+
* SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometHashExpressionBenchmark
31+
* }}}
32+
* Results will be written to "spark/benchmarks/CometHashExpressionBenchmark-**results.txt".
33+
*/
34+
object CometHashExpressionBenchmark extends CometBenchmarkBase {
35+
36+
private val hashExpressions = List(
37+
HashExprConfig("xxhash64_single", "SELECT xxhash64(c_str) FROM parquetV1Table"),
38+
HashExprConfig("xxhash64_multi", "SELECT xxhash64(c_str, c_int, c_long) FROM parquetV1Table"),
39+
HashExprConfig("murmur3_hash_single", "SELECT hash(c_str) FROM parquetV1Table"),
40+
HashExprConfig("murmur3_hash_multi", "SELECT hash(c_str, c_int, c_long) FROM parquetV1Table"),
41+
HashExprConfig("sha1", "SELECT sha1(c_str) FROM parquetV1Table"),
42+
HashExprConfig("sha2_224", "SELECT sha2(c_str, 224) FROM parquetV1Table"),
43+
HashExprConfig("sha2_256", "SELECT sha2(c_str, 256) FROM parquetV1Table"),
44+
HashExprConfig("sha2_384", "SELECT sha2(c_str, 384) FROM parquetV1Table"),
45+
HashExprConfig("sha2_512", "SELECT sha2(c_str, 512) FROM parquetV1Table"))
46+
47+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
48+
val values = 1024 * 1024
49+
50+
runBenchmarkWithTable("Hash expression benchmarks", values) { v =>
51+
withTempPath { dir =>
52+
withTempTable("parquetV1Table") {
53+
prepareTable(
54+
dir,
55+
spark.sql(s"""
56+
SELECT
57+
CASE WHEN value % 100 = 0 THEN NULL ELSE CONCAT('string_', CAST(value AS STRING)) END AS c_str,
58+
CASE WHEN value % 100 = 1 THEN NULL ELSE CAST(value % 1000000 AS INT) END AS c_int,
59+
CASE WHEN value % 100 = 2 THEN NULL ELSE CAST(value * 1000 AS LONG) END AS c_long
60+
FROM $tbl
61+
"""))
62+
63+
hashExpressions.foreach { config =>
64+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
65+
}
66+
}
67+
}
68+
}
69+
}
70+
}

0 commit comments

Comments
 (0)