Skip to content

Commit 37cb5c9

Browse files
authored
chore: Improve microbenchmark for string expressions (#2964)
1 parent 2738f06 commit 37cb5c9

File tree

1 file changed

+24
-20
lines changed

1 file changed

+24
-20
lines changed

spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,22 +44,6 @@ case class StringExprConfig(
4444
// spotless:on
4545
object CometStringExpressionBenchmark extends CometBenchmarkBase {
4646

47-
/**
48-
* Generic method to run a string expression benchmark with the given configuration.
49-
*/
50-
def runStringExprBenchmark(config: StringExprConfig, values: Int): Unit = {
51-
withTempPath { dir =>
52-
withTempTable("parquetV1Table") {
53-
prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
54-
55-
val extraConfigs =
56-
Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") ++ config.extraCometConfigs
57-
58-
runExpressionBenchmark(config.name, values, config.query, extraConfigs)
59-
}
60-
}
61-
}
62-
6347
// Configuration for all string expression benchmarks
6448
private val stringExpressions = List(
6549
StringExprConfig("Substring", "select substring(c1, 1, 100) from parquetV1Table"),
@@ -71,7 +55,16 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
7155
StringExprConfig("chr", "select chr(c1) from parquetV1Table"),
7256
StringExprConfig("initCap", "select initCap(c1) from parquetV1Table"),
7357
StringExprConfig("trim", "select trim(c1) from parquetV1Table"),
58+
StringExprConfig("btrim", "select btrim(c1) from parquetV1Table"),
59+
StringExprConfig("ltrim", "select ltrim(c1) from parquetV1Table"),
60+
StringExprConfig("rtrim", "select rtrim(c1) from parquetV1Table"),
61+
StringExprConfig("lpad", "select lpad(c1, 120, 'x') from parquetV1Table"),
62+
StringExprConfig("rpad", "select rpad(c1, 120, 'x') from parquetV1Table"),
63+
StringExprConfig("concat", "select concat(c1, c1) from parquetV1Table"),
7464
StringExprConfig("concatws", "select concat_ws(' ', c1, c1) from parquetV1Table"),
65+
StringExprConfig("contains", "select contains(c1, '123') from parquetV1Table"),
66+
StringExprConfig("startsWith", "select startswith(c1, '123') from parquetV1Table"),
67+
StringExprConfig("endsWith", "select endswith(c1, '123') from parquetV1Table"),
7568
StringExprConfig("length", "select length(c1) from parquetV1Table"),
7669
StringExprConfig("repeat", "select repeat(c1, 3) from parquetV1Table"),
7770
StringExprConfig("reverse", "select reverse(c1) from parquetV1Table"),
@@ -81,11 +74,22 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
8174
StringExprConfig("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table"))
8275

8376
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
84-
val values = 1024 * 1024;
77+
runBenchmarkWithTable("String expressions", 1024) { v =>
78+
withTempPath { dir =>
79+
withTempTable("parquetV1Table") {
80+
prepareTable(
81+
dir,
82+
spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 10) AS c1 FROM $tbl"))
83+
84+
val extraConfigs = Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true")
8585

86-
stringExpressions.foreach { config =>
87-
runBenchmarkWithTable(config.name, values) { v =>
88-
runStringExprBenchmark(config, v)
86+
stringExpressions.foreach { config =>
87+
val allConfigs = extraConfigs ++ config.extraCometConfigs
88+
runBenchmark(config.name) {
89+
runExpressionBenchmark(config.name, v, config.query, allConfigs)
90+
}
91+
}
92+
}
8993
}
9094
}
9195
}

0 commit comments

Comments
 (0)