Skip to content
This repository was archived by the owner on Jan 9, 2020. It is now read-only.

Commit 8be7e6b

Browse files
maropugatorsmile
authored andcommitted
[SPARK-21973][SQL] Add an new option to filter queries in TPC-DS
## What changes were proposed in this pull request? This pr added a new option to filter TPC-DS queries to run in `TPCDSQueryBenchmark`. By default, `TPCDSQueryBenchmark` runs all the TPC-DS queries. This change could enable developers to run some of the TPC-DS queries by this option, e.g., to run q2, q4, and q6 only: ``` spark-submit --class <this class> --conf spark.sql.tpcds.queryFilter="q2,q4,q6" --jars <spark sql test jar> ``` ## How was this patch tested? Manually checked. Author: Takeshi Yamamuro <[email protected]> Closes apache#19188 from maropu/RunPartialQueriesInTPCDS.
1 parent 17edfec commit 8be7e6b

File tree

2 files changed

+35
-5
lines changed

2 files changed

+35
-5
lines changed

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.sql.execution.benchmark
1919

2020
import org.apache.spark.SparkConf
21+
import org.apache.spark.internal.Logging
2122
import org.apache.spark.sql.SparkSession
2223
import org.apache.spark.sql.catalyst.TableIdentifier
2324
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
@@ -29,9 +30,9 @@ import org.apache.spark.util.Benchmark
2930
/**
3031
* Benchmark to measure TPCDS query performance.
3132
* To run this:
32-
* spark-submit --class <this class> <spark sql test jar> <TPCDS data location>
33+
* spark-submit --class <this class> <spark sql test jar> --data-location <TPCDS data location>
3334
*/
34-
object TPCDSQueryBenchmark {
35+
object TPCDSQueryBenchmark extends Logging {
3536
val conf =
3637
new SparkConf()
3738
.setMaster("local[1]")
@@ -90,7 +91,9 @@ object TPCDSQueryBenchmark {
9091
benchmark.addCase(name) { i =>
9192
spark.sql(queryString).collect()
9293
}
94+
logInfo(s"\n\n===== TPCDS QUERY BENCHMARK OUTPUT FOR $name =====\n")
9395
benchmark.run()
96+
logInfo(s"\n\n===== FINISHED $name =====\n")
9497
}
9598
}
9699

@@ -110,6 +113,20 @@ object TPCDSQueryBenchmark {
110113
"q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
111114
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
112115

113-
tpcdsAll(benchmarkArgs.dataLocation, queries = tpcdsQueries)
116+
// If `--query-filter` defined, filters the queries that this option selects
117+
val queriesToRun = if (benchmarkArgs.queryFilter.nonEmpty) {
118+
val queries = tpcdsQueries.filter { case queryName =>
119+
benchmarkArgs.queryFilter.contains(queryName)
120+
}
121+
if (queries.isEmpty) {
122+
throw new RuntimeException(
123+
s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}")
124+
}
125+
queries
126+
} else {
127+
tpcdsQueries
128+
}
129+
130+
tpcdsAll(benchmarkArgs.dataLocation, queries = queriesToRun)
114131
}
115132
}

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,33 @@
1717

1818
package org.apache.spark.sql.execution.benchmark
1919

20+
import java.util.Locale
21+
22+
2023
class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
2124
var dataLocation: String = null
25+
var queryFilter: Set[String] = Set.empty
2226

2327
parseArgs(args.toList)
2428
validateArguments()
2529

30+
private def optionMatch(optionName: String, s: String): Boolean = {
31+
optionName == s.toLowerCase(Locale.ROOT)
32+
}
33+
2634
private def parseArgs(inputArgs: List[String]): Unit = {
2735
var args = inputArgs
2836

29-
while(args.nonEmpty) {
37+
while (args.nonEmpty) {
3038
args match {
31-
case ("--data-location") :: value :: tail =>
39+
case optName :: value :: tail if optionMatch("--data-location", optName) =>
3240
dataLocation = value
3341
args = tail
3442

43+
case optName :: value :: tail if optionMatch("--query-filter", optName) =>
44+
queryFilter = value.toLowerCase(Locale.ROOT).split(",").map(_.trim).toSet
45+
args = tail
46+
3547
case _ =>
3648
// scalastyle:off println
3749
System.err.println("Unknown/unsupported param " + args)
@@ -47,6 +59,7 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
4759
|Usage: spark-submit --class <this class> <spark sql test jar> [Options]
4860
|Options:
4961
| --data-location Path to TPCDS data
62+
| --query-filter Queries to filter, e.g., q3,q5,q13
5063
|
5164
|------------------------------------------------------------------------------------------------------------------
5265
|In order to run this benchmark, please follow the instructions at

0 commit comments

Comments
 (0)