Skip to content

Commit 808b3ac

Browse files
wayneguowdongjoon-hyun
authored andcommitted
[SPARK-50666][SQL] Support hint for reading in JDBC data source
### What changes were proposed in this pull request? This PR aims to add a hint option for JDBC data source. This option is used to specify the hint for reading. It will apply only if the underlying DBMS supports the hint feature. Currently, this option is only supported by OracleDialect and MySQLDialect. ### Why are the changes needed? It's useful for performance tuning when reading from DBMS. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Passed GA and add a new test case. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49564 from wayneguow/jdbc_hint. Authored-by: Wei Guo <guow93@gmail.com> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org> (cherry picked from commit fef1b23) Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
1 parent 4c00d63 commit 808b3ac

File tree

14 files changed

+138
-8
lines changed

14 files changed

+138
-8
lines changed

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,12 @@
17011701
],
17021702
"sqlState" : "42822"
17031703
},
1704+
"HINT_UNSUPPORTED_FOR_JDBC_DIALECT" : {
1705+
"message" : [
1706+
"The option `hint` is not supported for <jdbcDialect> in JDBC data source. Supported dialects are `MySQLDialect`, `OracleDialect` and `DatabricksDialect`."
1707+
],
1708+
"sqlState" : "42822"
1709+
},
17041710
"HLL_INVALID_INPUT_SKETCH_BUFFER" : {
17051711
"message" : [
17061712
"Invalid call to <function>; only valid HLL sketch buffers are supported as inputs (such as those produced by the `hll_sketch_agg` function)."

docs/sql-data-sources-jdbc.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,14 @@ logging into the data sources.
374374
</td>
375375
<td>read</td>
376376
</tr>
377+
<tr>
378+
<td><code>hint</code></td>
379+
<td>(none)</td>
380+
<td>
381+
This option is used to specify the hint for reading. The supported hint format is a variant of C-style comments: it needs to start with `/*+ ` and end with ` */`. Currently, this option is only supported in MySQLDialect, OracleDialect and DatabricksDialect.
382+
</td>
383+
<td>read</td>
384+
</tr>
377385
</table>
378386

379387
Note that kerberos authentication with keytab is not always supported by the JDBC driver.<br>

pom.xml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,9 @@
334334
<db2.jcc.version>11.5.9.0</db2.jcc.version>
335335
<mssql.jdbc.version>12.8.1.jre11</mssql.jdbc.version>
336336
<ojdbc17.version>23.6.0.24.10</ojdbc17.version>
337+
<databricks.jdbc.version>2.7.1</databricks.jdbc.version>
338+
<snowflake.jdbc.version>3.21.0</snowflake.jdbc.version>
339+
<terajdbc.version>20.00.00.39</terajdbc.version>
337340
<!-- Used for SBT build to retrieve the Spark version -->
338341
<spark.version>${project.version}</spark.version>
339342
</properties>
@@ -1350,6 +1353,24 @@
13501353
<version>${ojdbc17.version}</version>
13511354
<scope>test</scope>
13521355
</dependency>
1356+
<dependency>
1357+
<groupId>com.databricks</groupId>
1358+
<artifactId>databricks-jdbc</artifactId>
1359+
<version>${databricks.jdbc.version}</version>
1360+
<scope>test</scope>
1361+
</dependency>
1362+
<dependency>
1363+
<groupId>net.snowflake</groupId>
1364+
<artifactId>snowflake-jdbc</artifactId>
1365+
<version>${snowflake.jdbc.version}</version>
1366+
<scope>test</scope>
1367+
</dependency>
1368+
<dependency>
1369+
<groupId>com.teradata.jdbc</groupId>
1370+
<artifactId>terajdbc</artifactId>
1371+
<version>${terajdbc.version}</version>
1372+
<scope>test</scope>
1373+
</dependency>
13531374
<dependency>
13541375
<groupId>org.apache.curator</groupId>
13551376
<artifactId>curator-recipes</artifactId>

sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
980980
messageParameters = Map("content" -> content))
981981
}
982982

983+
def hintUnsupportedForJdbcDialectError(jdbcDialect: String): SparkIllegalArgumentException = {
984+
new SparkIllegalArgumentException(
985+
errorClass = "HINT_UNSUPPORTED_FOR_JDBC_DIALECT",
986+
messageParameters = Map("jdbcDialect" -> jdbcDialect))
987+
}
988+
983989
def unsupportedArrayElementTypeBasedOnBinaryError(dt: DataType): SparkIllegalArgumentException = {
984990
new SparkIllegalArgumentException(
985991
errorClass = "_LEGACY_ERROR_TEMP_2084",

sql/core/pom.xml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,21 @@
222222
<artifactId>derbytools</artifactId>
223223
<scope>test</scope>
224224
</dependency>
225+
<dependency>
226+
<groupId>com.databricks</groupId>
227+
<artifactId>databricks-jdbc</artifactId>
228+
<scope>test</scope>
229+
</dependency>
230+
<dependency>
231+
<groupId>net.snowflake</groupId>
232+
<artifactId>snowflake-jdbc</artifactId>
233+
<scope>test</scope>
234+
</dependency>
235+
<dependency>
236+
<groupId>com.teradata.jdbc</groupId>
237+
<artifactId>terajdbc</artifactId>
238+
<scope>test</scope>
239+
</dependency>
225240
<dependency>
226241
<groupId>org.apache.parquet</groupId>
227242
<artifactId>parquet-avro</artifactId>

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,15 @@ class JDBCOptions(
249249
.map(_.toBoolean)
250250
.getOrElse(SQLConf.get.timestampType == TimestampNTZType)
251251

252+
val hint = {
253+
parameters.get(JDBC_HINT_STRING).map(value => {
254+
require(value.matches("(?s)^/\\*\\+ .* \\*/$"),
255+
s"Invalid value `$value` for option `$JDBC_HINT_STRING`." +
256+
s" It should start with `/*+ ` and end with ` */`.")
257+
s"$value "
258+
}).getOrElse("")
259+
}
260+
252261
override def hashCode: Int = this.parameters.hashCode()
253262

254263
override def equals(other: Any): Boolean = other match {
@@ -321,4 +330,5 @@ object JDBCOptions {
321330
val JDBC_CONNECTION_PROVIDER = newOption("connectionProvider")
322331
val JDBC_PREPARE_QUERY = newOption("prepareQuery")
323332
val JDBC_PREFER_TIMESTAMP_NTZ = newOption("preferTimestampNTZ")
333+
val JDBC_HINT_STRING = newOption("hint")
324334
}

sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ private case class DB2Dialect() extends JdbcDialect with SQLConfHelper with NoLe
207207
val offsetClause = dialect.getOffsetClause(offset)
208208

209209
options.prepareQuery +
210-
s"SELECT $columnList FROM ${options.tableOrQuery} $tableSampleClause" +
210+
s"SELECT $hintClause$columnList FROM ${options.tableOrQuery} $tableSampleClause" +
211211
s" $whereClause $groupByClause $orderByClause $offsetClause $limitClause"
212212
}
213213
}

sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ private case class DatabricksDialect() extends JdbcDialect with NoLegacyJDBCErro
6363
s"TABLESAMPLE (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})"
6464
}
6565

66+
override def supportsHint: Boolean = true
67+
6668
// Override listSchemas to run "show schemas" as a PreparedStatement instead of
6769
// invoking getMetaData.getSchemas as it may not work correctly in older versions of the driver.
6870
override def schemasExists(conn: Connection, options: JDBCOptions, schema: String): Boolean = {

sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,8 @@ abstract class JdbcDialect extends Serializable with Logging {
826826
def getTableSample(sample: TableSampleInfo): String =
827827
throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3183")
828828

829+
def supportsHint: Boolean = false
830+
829831
/**
830832
* Return the DB-specific quoted and fully qualified table name
831833
*/

sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcSQLQueryBuilder.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.spark.sql.jdbc
1919

2020
import org.apache.spark.sql.connector.expressions.filter.Predicate
21+
import org.apache.spark.sql.errors.QueryExecutionErrors
2122
import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition}
2223
import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
2324

@@ -67,6 +68,18 @@ class JdbcSQLQueryBuilder(dialect: JdbcDialect, options: JDBCOptions) {
6768
*/
6869
protected var tableSampleClause: String = ""
6970

71+
/**
72+
* A hint sample clause representing query hints.
73+
*/
74+
protected val hintClause: String = {
75+
if (options.hint == "" || dialect.supportsHint) {
76+
options.hint
77+
} else {
78+
throw QueryExecutionErrors.hintUnsupportedForJdbcDialectError(
79+
dialect.getClass.getSimpleName)
80+
}
81+
}
82+
7083
/**
7184
* The columns names that following dialect's SQL syntax.
7285
* e.g. The column name is the raw name or quoted name.
@@ -161,7 +174,7 @@ class JdbcSQLQueryBuilder(dialect: JdbcDialect, options: JDBCOptions) {
161174
val offsetClause = dialect.getOffsetClause(offset)
162175

163176
options.prepareQuery +
164-
s"SELECT $columnList FROM ${options.tableOrQuery} $tableSampleClause" +
177+
s"SELECT $hintClause$columnList FROM ${options.tableOrQuery} $tableSampleClause" +
165178
s" $whereClause $groupByClause $orderByClause $limitClause $offsetClause"
166179
}
167180
}

0 commit comments

Comments
 (0)