Skip to content

Commit 0f98a3c

Browse files
author
Kazantsev Maksim
committed
Add benchmark test
1 parent 4b02dd6 commit 0f98a3c

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

docs/source/user-guide/latest/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ These settings can be used to determine which parts of the plan are accelerated
324324
| `spark.comet.expression.StringTrimBoth.enabled` | Enable Comet acceleration for `StringTrimBoth` | true |
325325
| `spark.comet.expression.StringTrimLeft.enabled` | Enable Comet acceleration for `StringTrimLeft` | true |
326326
| `spark.comet.expression.StringTrimRight.enabled` | Enable Comet acceleration for `StringTrimRight` | true |
327+
Add| `spark.comet.expression.StructsToCsv.enabled` | Enable Comet acceleration for `StructsToCsv` | true |
327328
| `spark.comet.expression.StructsToJson.enabled` | Enable Comet acceleration for `StructsToJson` | true |
328329
| `spark.comet.expression.Substring.enabled` | Enable Comet acceleration for `Substring` | true |
329330
| `spark.comet.expression.Subtract.enabled` | Enable Comet acceleration for `Subtract` | true |
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
import org.apache.spark.sql.benchmark.CometJsonExpressionBenchmark.{prepareTable, runExpressionBenchmark, withTempPath, withTempTable}
23+
import org.apache.spark.sql.benchmark.CometStringExpressionBenchmark.{spark, tbl}
24+
import org.apache.spark.sql.catalyst.expressions.{CsvToStructs, JsonToStructs}
25+
26+
import org.apache.comet.CometConf
27+
28+
/**
29+
* Configuration for a CSV expression benchmark.
30+
*
31+
* @param name
32+
* Name for the benchmark
33+
* @param query
34+
* SQL query to benchmark
35+
* @param extraCometConfigs
36+
* Additional Comet configurations for the scan+exec case
37+
*/
38+
case class CsvExprConfig(
39+
name: String,
40+
query: String,
41+
extraCometConfigs: Map[String, String] = Map.empty)
42+
43+
// spotless:off
44+
/**
45+
* Benchmark to measure performance of Comet CSV expressions. To run this benchmark:
46+
* `SPARK_GENERATE_BENCHMARK_FILES=1 make
47+
* benchmark-org.apache.spark.sql.benchmark.CometCsvExpressionBenchmark` Results will be written
48+
* to "spark/benchmarks/CometCsvExpressionBenchmark-**results.txt".
49+
*/
50+
// spotless:on
51+
object CometCsvExpressionBenchmark extends CometBenchmarkBase {
52+
53+
/**
54+
* Generic method to run a CSV expression benchmark with the given configuration.
55+
*/
56+
def runCsvExprBenchmark(config: CsvExprConfig, values: Int): Unit = {
57+
withTempPath { dir =>
58+
withTempTable("parquetV1Table") {
59+
prepareTable(
60+
dir,
61+
spark.sql(
62+
s"SELECT CAST(value AS STRING) AS c1, CAST(value AS INT) AS c2, CAST(value AS LONG) AS c3 FROM $tbl"))
63+
64+
val extraConfigs = Map(
65+
CometConf.getExprAllowIncompatConfigKey(
66+
classOf[CsvToStructs]) -> "true") ++ config.extraCometConfigs
67+
68+
runExpressionBenchmark(config.name, values, config.query, extraConfigs)
69+
}
70+
}
71+
}
72+
73+
// Configuration for all CSV expression benchmarks
74+
private val csvExpressions = List(
75+
CsvExprConfig("to_csv", "SELECT to_csv(struct(c1, c2, c3)) FROM parquetV1Table"))
76+
77+
override def runCometBenchmark(args: Array[String]): Unit = {
78+
val values = 1024 * 1024
79+
80+
csvExpressions.foreach { config =>
81+
runBenchmarkWithTable(config.name, values) { value =>
82+
runCsvExprBenchmark(config, value)
83+
}
84+
}
85+
}
86+
}

0 commit comments

Comments
 (0)