Skip to content

Commit 937619f

Browse files
authored
chore: Add microbenchmark for casting string to numeric (#2979)
1 parent aefb3b6 commit 937619f

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
import org.apache.spark.sql.catalyst.expressions.Cast
23+
import org.apache.spark.sql.internal.SQLConf
24+
25+
import org.apache.comet.CometConf
26+
27+
case class CastStringToNumericConfig(
28+
name: String,
29+
query: String,
30+
extraCometConfigs: Map[String, String] = Map.empty)
31+
32+
/**
33+
* Benchmark to measure performance of Comet cast from String to numeric types. To run this
34+
* benchmark:
35+
* {{{
36+
* SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometCastStringToNumericBenchmark
37+
* }}}
38+
*/
39+
object CometCastStringToNumericBenchmark extends CometBenchmarkBase {
40+
41+
private val castFunctions = Seq("CAST", "TRY_CAST")
42+
private val targetTypes =
43+
Seq(
44+
"BOOLEAN",
45+
"BYTE",
46+
"SHORT",
47+
"INT",
48+
"LONG",
49+
"FLOAT",
50+
"DOUBLE",
51+
"DECIMAL(10,2)",
52+
"DECIMAL(38,19)")
53+
54+
private val castConfigs = for {
55+
castFunc <- castFunctions
56+
targetType <- targetTypes
57+
} yield CastStringToNumericConfig(
58+
s"$castFunc String to $targetType",
59+
s"SELECT $castFunc(c1 AS $targetType) FROM parquetV1Table",
60+
Map(
61+
SQLConf.ANSI_ENABLED.key -> "false",
62+
CometConf.getExprAllowIncompatConfigKey(classOf[Cast]) -> "true"))
63+
64+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
65+
val values = 1024 * 1024 // 1M rows
66+
67+
// Generate input data once for all benchmarks
68+
runBenchmarkWithTable("String to numeric casts", values) { v =>
69+
withTempPath { dir =>
70+
withTempTable("parquetV1Table") {
71+
// Generate numeric strings with both integer and decimal values
72+
// Also include some special values: nulls (~2%), NaN (~2%), Infinity (~2%)
73+
prepareTable(
74+
dir,
75+
spark.sql(s"""
76+
SELECT CASE
77+
WHEN value % 50 = 0 THEN NULL
78+
WHEN value % 50 = 1 THEN 'NaN'
79+
WHEN value % 50 = 2 THEN 'Infinity'
80+
WHEN value % 50 = 3 THEN '-Infinity'
81+
WHEN value % 50 < 10 THEN CAST(value % 99 AS STRING)
82+
WHEN value % 50 < 30 THEN CAST(value % 999999 AS STRING)
83+
ELSE CAST((value - 500000) / 100.0 AS STRING)
84+
END AS c1
85+
FROM $tbl
86+
"""))
87+
88+
castConfigs.foreach { config =>
89+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
90+
}
91+
}
92+
}
93+
}
94+
}
95+
}

0 commit comments

Comments
 (0)