Skip to content

Commit 93ab38f

Browse files
committed
implement more microbenchmarks for casts
1 parent 6ec70ae commit 93ab38f

File tree

6 files changed

+662
-0
lines changed

6 files changed

+662
-0
lines changed
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
case class CastBooleanConfig(
23+
name: String,
24+
query: String,
25+
extraCometConfigs: Map[String, String] = Map.empty)
26+
27+
// spotless:off
28+
/**
29+
* Benchmark to measure performance of Comet cast operations involving Boolean type. To run this
30+
* benchmark:
31+
* `SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometCastBooleanBenchmark`
32+
* Results will be written to "spark/benchmarks/CometCastBooleanBenchmark-**results.txt".
33+
*/
34+
// spotless:on
35+
object CometCastBooleanBenchmark extends CometBenchmarkBase {
36+
37+
private val castFunctions = Seq("CAST", "TRY_CAST")
38+
39+
// Boolean to String
40+
private val boolToStringConfigs = for {
41+
castFunc <- castFunctions
42+
} yield CastBooleanConfig(
43+
s"$castFunc Boolean to String",
44+
s"SELECT $castFunc(c_bool AS STRING) FROM parquetV1Table")
45+
46+
// Boolean to numeric types
47+
private val boolToNumericTypes =
48+
Seq("BYTE", "SHORT", "INT", "LONG", "FLOAT", "DOUBLE", "DECIMAL(10,2)")
49+
private val boolToNumericConfigs = for {
50+
castFunc <- castFunctions
51+
targetType <- boolToNumericTypes
52+
} yield CastBooleanConfig(
53+
s"$castFunc Boolean to $targetType",
54+
s"SELECT $castFunc(c_bool AS $targetType) FROM parquetV1Table")
55+
56+
// Numeric to Boolean
57+
private val numericTypes = Seq(
58+
("BYTE", "c_byte"),
59+
("SHORT", "c_short"),
60+
("INT", "c_int"),
61+
("LONG", "c_long"),
62+
("FLOAT", "c_float"),
63+
("DOUBLE", "c_double"),
64+
("DECIMAL(10,2)", "c_decimal"))
65+
66+
private val numericToBoolConfigs = for {
67+
castFunc <- castFunctions
68+
(sourceType, colName) <- numericTypes
69+
} yield CastBooleanConfig(
70+
s"$castFunc $sourceType to Boolean",
71+
s"SELECT $castFunc($colName AS BOOLEAN) FROM parquetV1Table")
72+
73+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
74+
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
75+
76+
// Generate boolean data for boolean-to-other casts
77+
runBenchmarkWithTable("Boolean to other types casts", values) { v =>
78+
withTempPath { dir =>
79+
withTempTable("parquetV1Table") {
80+
prepareTable(
81+
dir,
82+
spark.sql(s"""
83+
SELECT CASE
84+
WHEN value % 100 = 0 THEN NULL
85+
ELSE (value % 2 = 0)
86+
END AS c_bool
87+
FROM $tbl
88+
"""))
89+
90+
(boolToStringConfigs ++ boolToNumericConfigs).foreach { config =>
91+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
92+
}
93+
}
94+
}
95+
}
96+
97+
// Generate numeric data for numeric-to-boolean casts
98+
runBenchmarkWithTable("Numeric to Boolean casts", values) { v =>
99+
withTempPath { dir =>
100+
withTempTable("parquetV1Table") {
101+
prepareTable(
102+
dir,
103+
spark.sql(s"""
104+
SELECT
105+
CASE WHEN value % 100 = 0 THEN NULL ELSE CAST((value % 3) - 1 AS BYTE) END AS c_byte,
106+
CASE WHEN value % 100 = 1 THEN NULL ELSE CAST((value % 3) - 1 AS SHORT) END AS c_short,
107+
CASE WHEN value % 100 = 2 THEN NULL ELSE CAST((value % 3) - 1 AS INT) END AS c_int,
108+
CASE WHEN value % 100 = 3 THEN NULL ELSE CAST((value % 3) - 1 AS LONG) END AS c_long,
109+
CASE WHEN value % 100 = 4 THEN NULL ELSE CAST((value % 3) - 1 AS FLOAT) END AS c_float,
110+
CASE WHEN value % 100 = 5 THEN NULL ELSE CAST((value % 3) - 1 AS DOUBLE) END AS c_double,
111+
CASE WHEN value % 100 = 6 THEN NULL ELSE CAST((value % 3) - 1 AS DECIMAL(10,2)) END AS c_decimal
112+
FROM $tbl
113+
"""))
114+
115+
numericToBoolConfigs.foreach { config =>
116+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
117+
}
118+
}
119+
}
120+
}
121+
}
122+
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
case class CastNumericToNumericConfig(
23+
name: String,
24+
query: String,
25+
extraCometConfigs: Map[String, String] = Map.empty)
26+
27+
// spotless:off
28+
/**
29+
* Benchmark to measure performance of Comet cast between numeric types. To run this
30+
* benchmark:
31+
* `SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometCastNumericToNumericBenchmark`
32+
* Results will be written to "spark/benchmarks/CometCastNumericToNumericBenchmark-**results.txt".
33+
*/
34+
// spotless:on
35+
object CometCastNumericToNumericBenchmark extends CometBenchmarkBase {
36+
37+
private val castFunctions = Seq("CAST", "TRY_CAST")
38+
39+
// Integer widening conversions
40+
private val integerWideningPairs = Seq(
41+
("BYTE", "c_byte", "SHORT"),
42+
("BYTE", "c_byte", "INT"),
43+
("BYTE", "c_byte", "LONG"),
44+
("SHORT", "c_short", "INT"),
45+
("SHORT", "c_short", "LONG"),
46+
("INT", "c_int", "LONG"))
47+
48+
// Integer narrowing conversions
49+
private val integerNarrowingPairs = Seq(
50+
("LONG", "c_long", "INT"),
51+
("LONG", "c_long", "SHORT"),
52+
("LONG", "c_long", "BYTE"),
53+
("INT", "c_int", "SHORT"),
54+
("INT", "c_int", "BYTE"),
55+
("SHORT", "c_short", "BYTE"))
56+
57+
// Floating point conversions
58+
private val floatPairs = Seq(("FLOAT", "c_float", "DOUBLE"), ("DOUBLE", "c_double", "FLOAT"))
59+
60+
// Integer to floating point conversions
61+
private val intToFloatPairs = Seq(
62+
("BYTE", "c_byte", "FLOAT"),
63+
("SHORT", "c_short", "FLOAT"),
64+
("INT", "c_int", "FLOAT"),
65+
("LONG", "c_long", "FLOAT"),
66+
("INT", "c_int", "DOUBLE"),
67+
("LONG", "c_long", "DOUBLE"))
68+
69+
// Floating point to integer conversions
70+
private val floatToIntPairs = Seq(
71+
("FLOAT", "c_float", "INT"),
72+
("FLOAT", "c_float", "LONG"),
73+
("DOUBLE", "c_double", "INT"),
74+
("DOUBLE", "c_double", "LONG"))
75+
76+
// Decimal conversions
77+
private val decimalPairs = Seq(
78+
("INT", "c_int", "DECIMAL(10,2)"),
79+
("LONG", "c_long", "DECIMAL(20,4)"),
80+
("DOUBLE", "c_double", "DECIMAL(15,5)"),
81+
("DECIMAL(10,2)", "c_decimal", "INT"),
82+
("DECIMAL(10,2)", "c_decimal", "LONG"),
83+
("DECIMAL(10,2)", "c_decimal", "DOUBLE"))
84+
85+
private def generateConfigs(
86+
pairs: Seq[(String, String, String)]): Seq[CastNumericToNumericConfig] = {
87+
for {
88+
castFunc <- castFunctions
89+
(sourceType, colName, targetType) <- pairs
90+
} yield CastNumericToNumericConfig(
91+
s"$castFunc $sourceType to $targetType",
92+
s"SELECT $castFunc($colName AS $targetType) FROM parquetV1Table")
93+
}
94+
95+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
96+
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
97+
98+
// Generate input data once with all numeric types
99+
runBenchmarkWithTable("Numeric to Numeric casts", values) { v =>
100+
withTempPath { dir =>
101+
withTempTable("parquetV1Table") {
102+
// Generate varied numeric data including edge cases
103+
prepareTable(
104+
dir,
105+
spark.sql(s"""
106+
SELECT
107+
CASE WHEN value % 100 = 0 THEN NULL ELSE CAST((value % 128) - 64 AS BYTE) END AS c_byte,
108+
CASE WHEN value % 100 = 1 THEN NULL ELSE CAST((value % 32768) - 16384 AS SHORT) END AS c_short,
109+
CASE WHEN value % 100 = 2 THEN NULL ELSE CAST(value - 2500000 AS INT) END AS c_int,
110+
CASE WHEN value % 100 = 3 THEN NULL ELSE CAST(value * 1000 AS LONG) END AS c_long,
111+
CASE
112+
WHEN value % 100 = 4 THEN NULL
113+
WHEN value % 100 = 5 THEN CAST('NaN' AS FLOAT)
114+
WHEN value % 100 = 6 THEN CAST('Infinity' AS FLOAT)
115+
WHEN value % 100 = 7 THEN CAST('-Infinity' AS FLOAT)
116+
ELSE CAST((value - 2500000) / 100.0 AS FLOAT)
117+
END AS c_float,
118+
CASE
119+
WHEN value % 100 = 8 THEN NULL
120+
WHEN value % 100 = 9 THEN CAST('NaN' AS DOUBLE)
121+
WHEN value % 100 = 10 THEN CAST('Infinity' AS DOUBLE)
122+
WHEN value % 100 = 11 THEN CAST('-Infinity' AS DOUBLE)
123+
ELSE CAST((value - 2500000) / 100.0 AS DOUBLE)
124+
END AS c_double,
125+
CASE WHEN value % 100 = 12 THEN NULL ELSE CAST((value - 2500000) / 100.0 AS DECIMAL(10,2)) END AS c_decimal
126+
FROM $tbl
127+
"""))
128+
129+
// Run all benchmark categories
130+
(generateConfigs(integerWideningPairs) ++
131+
generateConfigs(integerNarrowingPairs) ++
132+
generateConfigs(floatPairs) ++
133+
generateConfigs(intToFloatPairs) ++
134+
generateConfigs(floatToIntPairs) ++
135+
generateConfigs(decimalPairs)).foreach { config =>
136+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
137+
}
138+
}
139+
}
140+
}
141+
}
142+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
case class CastNumericToStringConfig(
23+
name: String,
24+
query: String,
25+
extraCometConfigs: Map[String, String] = Map.empty)
26+
27+
// spotless:off
28+
/**
29+
* Benchmark to measure performance of Comet cast from numeric types to String. To run this
30+
* benchmark:
31+
* `SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometCastNumericToStringBenchmark`
32+
* Results will be written to "spark/benchmarks/CometCastNumericToStringBenchmark-**results.txt".
33+
*/
34+
// spotless:on
35+
object CometCastNumericToStringBenchmark extends CometBenchmarkBase {
36+
37+
private val castFunctions = Seq("CAST", "TRY_CAST")
38+
private val sourceTypes =
39+
Seq(
40+
("BOOLEAN", "c_bool"),
41+
("BYTE", "c_byte"),
42+
("SHORT", "c_short"),
43+
("INT", "c_int"),
44+
("LONG", "c_long"),
45+
("FLOAT", "c_float"),
46+
("DOUBLE", "c_double"),
47+
("DECIMAL(10,2)", "c_decimal"))
48+
49+
private val castConfigs = for {
50+
castFunc <- castFunctions
51+
(sourceType, colName) <- sourceTypes
52+
} yield CastNumericToStringConfig(
53+
s"$castFunc $sourceType to String",
54+
s"SELECT $castFunc($colName AS STRING) FROM parquetV1Table")
55+
56+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
57+
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
58+
59+
// Generate input data once with all numeric types
60+
runBenchmarkWithTable("Numeric to String casts", values) { v =>
61+
withTempPath { dir =>
62+
withTempTable("parquetV1Table") {
63+
// Generate varied numeric data including edge cases
64+
prepareTable(
65+
dir,
66+
spark.sql(s"""
67+
SELECT
68+
CASE WHEN value % 100 = 0 THEN NULL ELSE (value % 2 = 0) END AS c_bool,
69+
CASE WHEN value % 100 = 1 THEN NULL ELSE CAST((value % 128) - 64 AS BYTE) END AS c_byte,
70+
CASE WHEN value % 100 = 2 THEN NULL ELSE CAST((value % 32768) - 16384 AS SHORT) END AS c_short,
71+
CASE WHEN value % 100 = 3 THEN NULL ELSE CAST(value - 2500000 AS INT) END AS c_int,
72+
CASE WHEN value % 100 = 4 THEN NULL ELSE CAST(value * 1000000 AS LONG) END AS c_long,
73+
CASE
74+
WHEN value % 100 = 5 THEN NULL
75+
WHEN value % 100 = 6 THEN CAST('NaN' AS FLOAT)
76+
WHEN value % 100 = 7 THEN CAST('Infinity' AS FLOAT)
77+
WHEN value % 100 = 8 THEN CAST('-Infinity' AS FLOAT)
78+
ELSE CAST((value - 2500000) / 1000.0 AS FLOAT)
79+
END AS c_float,
80+
CASE
81+
WHEN value % 100 = 9 THEN NULL
82+
WHEN value % 100 = 10 THEN CAST('NaN' AS DOUBLE)
83+
WHEN value % 100 = 11 THEN CAST('Infinity' AS DOUBLE)
84+
WHEN value % 100 = 12 THEN CAST('-Infinity' AS DOUBLE)
85+
ELSE CAST((value - 2500000) / 100.0 AS DOUBLE)
86+
END AS c_double,
87+
CASE WHEN value % 100 = 13 THEN NULL ELSE CAST((value - 2500000) / 100.0 AS DECIMAL(10,2)) END AS c_decimal
88+
FROM $tbl
89+
"""))
90+
91+
castConfigs.foreach { config =>
92+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
93+
}
94+
}
95+
}
96+
}
97+
}
98+
}

0 commit comments

Comments
 (0)