@@ -35,13 +35,13 @@ case class StringExprConfig(
3535 query : String ,
3636 extraCometConfigs : Map [String , String ] = Map .empty)
3737
38- // spotless:off
3938/**
4039 * Benchmark to measure performance of Comet string expressions. To run this benchmark:
41- * `SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark`
40+ * {{{
41+ * SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometStringExpressionBenchmark
42+ * }}}
4243 * Results will be written to "spark/benchmarks/CometStringExpressionBenchmark-**results.txt".
4344 */
44- // spotless:on
4545object CometStringExpressionBenchmark extends CometBenchmarkBase {
4646
4747 /**
@@ -50,7 +50,7 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
5050 def runStringExprBenchmark (config : StringExprConfig , values : Int ): Unit = {
5151 withTempPath { dir =>
5252 withTempTable(" parquetV1Table" ) {
53- prepareTable(dir, spark.sql(s " SELECT REPEAT(CAST(value AS STRING), 100 ) AS c1 FROM $tbl" ))
53+ prepareTable(dir, spark.sql(s " SELECT REPEAT(CAST(value AS STRING), 20 ) AS c1 FROM $tbl" ))
5454
5555 val extraConfigs =
5656 Map (CometConf .COMET_CASE_CONVERSION_ENABLED .key -> " true" ) ++ config.extraCometConfigs
@@ -62,23 +62,36 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase {
6262
6363 // Configuration for all string expression benchmarks
6464 private val stringExpressions = List (
65- StringExprConfig (" Substring" , " select substring(c1, 1, 100) from parquetV1Table" ),
6665 StringExprConfig (" ascii" , " select ascii(c1) from parquetV1Table" ),
67- StringExprConfig (" bitLength" , " select bit_length(c1) from parquetV1Table" ),
68- StringExprConfig (" octet_length" , " select octet_length(c1) from parquetV1Table" ),
69- StringExprConfig (" upper" , " select upper(c1) from parquetV1Table" ),
70- StringExprConfig (" lower" , " select lower(c1) from parquetV1Table" ),
66+ StringExprConfig (" bit_length" , " select bit_length(c1) from parquetV1Table" ),
7167 StringExprConfig (" chr" , " select chr(c1) from parquetV1Table" ),
68+ StringExprConfig (" concat" , " select concat(c1, c1) from parquetV1Table" ),
69+ StringExprConfig (" concat_ws" , " select concat_ws(' ', c1, c1) from parquetV1Table" ),
70+ StringExprConfig (" contains" , " select contains(c1, '123') from parquetV1Table" ),
71+ StringExprConfig (" endswith" , " select endswith(c1, '9') from parquetV1Table" ),
7272 StringExprConfig (" initCap" , " select initCap(c1) from parquetV1Table" ),
73- StringExprConfig (" trim" , " select trim(c1) from parquetV1Table" ),
74- StringExprConfig (" concatws" , " select concat_ws(' ', c1, c1) from parquetV1Table" ),
73+ StringExprConfig (" instr" , " select instr(c1, '123') from parquetV1Table" ),
7574 StringExprConfig (" length" , " select length(c1) from parquetV1Table" ),
75+ StringExprConfig (" like" , " select c1 like '%123%' from parquetV1Table" ),
76+ StringExprConfig (" lower" , " select lower(c1) from parquetV1Table" ),
77+ StringExprConfig (" lpad" , " select lpad(c1, 150, 'x') from parquetV1Table" ),
78+ StringExprConfig (" ltrim" , " select ltrim(c1) from parquetV1Table" ),
79+ StringExprConfig (" octet_length" , " select octet_length(c1) from parquetV1Table" ),
80+ StringExprConfig (
81+ " regexp_replace" ,
82+ " select regexp_replace(c1, '[0-9]', 'X') from parquetV1Table" ),
7683 StringExprConfig (" repeat" , " select repeat(c1, 3) from parquetV1Table" ),
77- StringExprConfig (" reverse" , " select reverse(c1) from parquetV1Table" ),
78- StringExprConfig (" instr" , " select instr(c1, '123') from parquetV1Table" ),
7984 StringExprConfig (" replace" , " select replace(c1, '123', 'ab') from parquetV1Table" ),
85+ StringExprConfig (" reverse" , " select reverse(c1) from parquetV1Table" ),
86+ StringExprConfig (" rlike" , " select c1 rlike '[0-9]+' from parquetV1Table" ),
87+ StringExprConfig (" rpad" , " select rpad(c1, 150, 'x') from parquetV1Table" ),
88+ StringExprConfig (" rtrim" , " select rtrim(c1) from parquetV1Table" ),
8089 StringExprConfig (" space" , " select space(2) from parquetV1Table" ),
81- StringExprConfig (" translate" , " select translate(c1, '123456', 'aBcDeF') from parquetV1Table" ))
90+ StringExprConfig (" startswith" , " select startswith(c1, '1') from parquetV1Table" ),
91+ StringExprConfig (" substring" , " select substring(c1, 1, 100) from parquetV1Table" ),
92+ StringExprConfig (" translate" , " select translate(c1, '123456', 'aBcDeF') from parquetV1Table" ),
93+ StringExprConfig (" trim" , " select trim(c1) from parquetV1Table" ),
94+ StringExprConfig (" upper" , " select upper(c1) from parquetV1Table" ))
8295
8396 override def runCometBenchmark (mainArgs : Array [String ]): Unit = {
8497 val values = 1024 * 1024 ;
0 commit comments