Skip to content

Commit 7e3de64

Browse files
belieferMaxGekk
authored andcommitted
[SPARK-50792][SQL][FOLLOWUP] Improve the push down information for binary
### What changes were proposed in this pull request? This PR proposes to improve the push down information for binary. ### Why are the changes needed? Before this PR. the push down information for binary looks like below. `PushedFilters: [binary_col IS NOT NULL, binary_col = [B6e5af973]` After this PR, the push down information will be: `PushedFilters: [binary_col IS NOT NULL, binary_col = X'123456']` ### Does this PR introduce _any_ user-facing change? 'Yes'. Fix the bug. ### How was this patch tested? GA ### Was this patch authored or co-authored using generative AI tooling? 'No'. Closes #49555 from beliefer/SPARK-50792_followup. Authored-by: beliefer <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 4021d91 commit 7e3de64

File tree

2 files changed

+13
-14
lines changed

2 files changed

+13
-14
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@
1717

1818
package org.apache.spark.sql.connector.expressions
1919

20+
import org.apache.commons.codec.binary.Hex
2021
import org.apache.commons.lang3.StringUtils
2122

2223
import org.apache.spark.SparkException
2324
import org.apache.spark.sql.catalyst
2425
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
25-
import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
26+
import org.apache.spark.sql.types.{BinaryType, DataType, IntegerType, StringType}
2627
import org.apache.spark.util.ArrayImplicits._
2728

2829
/**
@@ -388,12 +389,13 @@ private[sql] object HoursTransform {
388389
}
389390

390391
private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] {
391-
override def toString: String = {
392-
if (dataType.isInstanceOf[StringType]) {
393-
s"'${StringUtils.replace(s"$value", "'", "''")}'"
394-
} else {
395-
s"$value"
396-
}
392+
override def toString: String = dataType match {
393+
case StringType => s"'${StringUtils.replace(s"$value", "'", "''")}'"
394+
case BinaryType =>
395+
assert(value.isInstanceOf[Array[Byte]])
396+
val bytes = value.asInstanceOf[Array[Byte]]
397+
"0x" + Hex.encodeHexString(bytes, false)
398+
case _ => s"$value"
397399
}
398400
}
399401

sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3107,13 +3107,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
31073107
sql(s"CREATE TABLE $tableName (binary_col BINARY)")
31083108
sql(s"INSERT INTO $tableName VALUES ($binary)")
31093109

3110-
val select = s"SELECT * FROM $tableName WHERE binary_col = $binary"
3111-
val df = sql(select)
3112-
val filter = df.queryExecution.optimizedPlan.collect {
3113-
case f: Filter => f
3114-
}
3115-
assert(filter.isEmpty, "Filter is not pushed")
3116-
assert(df.collect().length === 1, s"Binary literal test failed: $select")
3110+
val df = sql(s"SELECT * FROM $tableName WHERE binary_col = $binary")
3111+
checkFiltersRemoved(df)
3112+
checkPushedInfo(df, "PushedFilters: [binary_col IS NOT NULL, binary_col = 0x123456]")
3113+
checkAnswer(df, Row(Array(18, 52, 86)))
31173114
}
31183115
}
31193116

0 commit comments

Comments
 (0)