Skip to content

Commit b471c32

Browse files
committed
improve string benchmarks
1 parent 2bf2835 commit b471c32

File tree

1 file changed

+27
-14
lines changed

1 file changed

+27
-14
lines changed

spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,13 @@ case class StringExprConfig(
3535
query: String,
3636
extraCometConfigs: Map[String, String] = Map.empty)
3737

38-
// spotless:off
3938
/**
4039
* Benchmark to measure performance of Comet string expressions. To run this benchmark:
41-
* `SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark`
40+
* {{{
41+
* SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark
42+
* }}}
4243
* Results will be written to "spark/benchmarks/CometStringExpressionBenchmark-**results.txt".
4344
*/
44-
// spotless:on
4545
object CometStringExpressionBenchmark extends CometBenchmarkBase {
4646

4747
/**
@@ -50,7 +50,7 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
5050
def runStringExprBenchmark(config: StringExprConfig, values: Int): Unit = {
5151
withTempPath { dir =>
5252
withTempTable("parquetV1Table") {
53-
prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 100) AS c1 FROM $tbl"))
53+
prepareTable(dir, spark.sql(s"SELECT REPEAT(CAST(value AS STRING), 20) AS c1 FROM $tbl"))
5454

5555
val extraConfigs =
5656
Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") ++ config.extraCometConfigs
@@ -62,23 +62,36 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
6262

6363
// Configuration for all string expression benchmarks
6464
private val stringExpressions = List(
65-
StringExprConfig("Substring", "select substring(c1, 1, 100) from parquetV1Table"),
6665
StringExprConfig("ascii", "select ascii(c1) from parquetV1Table"),
67-
StringExprConfig("bitLength", "select bit_length(c1) from parquetV1Table"),
68-
StringExprConfig("octet_length", "select octet_length(c1) from parquetV1Table"),
69-
StringExprConfig("upper", "select upper(c1) from parquetV1Table"),
70-
StringExprConfig("lower", "select lower(c1) from parquetV1Table"),
66+
StringExprConfig("bit_length", "select bit_length(c1) from parquetV1Table"),
7167
StringExprConfig("chr", "select chr(c1) from parquetV1Table"),
68+
StringExprConfig("concat", "select concat(c1, c1) from parquetV1Table"),
69+
StringExprConfig("concat_ws", "select concat_ws(' ', c1, c1) from parquetV1Table"),
70+
StringExprConfig("contains", "select contains(c1, '123') from parquetV1Table"),
71+
StringExprConfig("endswith", "select endswith(c1, '9') from parquetV1Table"),
7272
StringExprConfig("initCap", "select initCap(c1) from parquetV1Table"),
73-
StringExprConfig("trim", "select trim(c1) from parquetV1Table"),
74-
StringExprConfig("concatws", "select concat_ws(' ', c1, c1) from parquetV1Table"),
73+
StringExprConfig("instr", "select instr(c1, '123') from parquetV1Table"),
7574
StringExprConfig("length", "select length(c1) from parquetV1Table"),
75+
StringExprConfig("like", "select c1 like '%123%' from parquetV1Table"),
76+
StringExprConfig("lower", "select lower(c1) from parquetV1Table"),
77+
StringExprConfig("lpad", "select lpad(c1, 150, 'x') from parquetV1Table"),
78+
StringExprConfig("ltrim", "select ltrim(c1) from parquetV1Table"),
79+
StringExprConfig("octet_length", "select octet_length(c1) from parquetV1Table"),
80+
StringExprConfig(
81+
"regexp_replace",
82+
"select regexp_replace(c1, '[0-9]', 'X') from parquetV1Table"),
7683
StringExprConfig("repeat", "select repeat(c1, 3) from parquetV1Table"),
77-
StringExprConfig("reverse", "select reverse(c1) from parquetV1Table"),
78-
StringExprConfig("instr", "select instr(c1, '123') from parquetV1Table"),
7984
StringExprConfig("replace", "select replace(c1, '123', 'ab') from parquetV1Table"),
85+
StringExprConfig("reverse", "select reverse(c1) from parquetV1Table"),
86+
StringExprConfig("rlike", "select c1 rlike '[0-9]+' from parquetV1Table"),
87+
StringExprConfig("rpad", "select rpad(c1, 150, 'x') from parquetV1Table"),
88+
StringExprConfig("rtrim", "select rtrim(c1) from parquetV1Table"),
8089
StringExprConfig("space", "select space(2) from parquetV1Table"),
81-
StringExprConfig("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table"))
90+
StringExprConfig("startswith", "select startswith(c1, '1') from parquetV1Table"),
91+
StringExprConfig("substring", "select substring(c1, 1, 100) from parquetV1Table"),
92+
StringExprConfig("translate", "select translate(c1, '123456', 'aBcDeF') from parquetV1Table"),
93+
StringExprConfig("trim", "select trim(c1) from parquetV1Table"),
94+
StringExprConfig("upper", "select upper(c1) from parquetV1Table"))
8295

8396
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
8497
val values = 1024 * 1024;

0 commit comments

Comments
 (0)