Skip to content

Commit bd7f8eb

Browse files
committed
Address revans2/res-life review: simplify LIKE escape validation
- Replace StringUtils.escapeLikeRegex call with a focused O(n) escape char validation that only checks the two invalid cases (escape char at end of pattern, escape char followed by non-special character). This avoids building a full regex string during planning. - Remove runtime safety net in GpuLike.doColumnar — tagExprForGpu already prevents GpuLike from being created for invalid patterns. - Remove now-unused StringUtils import from stringFunctions.scala. Signed-off-by: Allen Xu <allxu@nvidia.com> Made-with: Cursor Signed-off-by: Allen Xu <allxu@nvidia.com>
1 parent 0e1b4ad commit bd7f8eb

File tree

2 files changed

+25
-22
lines changed

2 files changed

+25
-22
lines changed

sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3502,20 +3502,31 @@ object GpuOverrides extends Logging {
35023502
("search", TypeSig.lit(TypeEnum.STRING), TypeSig.STRING)),
35033503
(a, conf, p, r) => new BinaryExprMeta[Like](a, conf, p, r) {
35043504
override def tagExprForGpu(): Unit = {
3505-
import org.apache.spark.sql.catalyst.util.StringUtils
3506-
try {
3507-
a.right match {
3508-
case l: Literal
3509-
if l.value.isInstanceOf[UTF8String] =>
3510-
StringUtils.escapeLikeRegex(
3511-
l.value.toString, a.escapeChar)
3512-
case _ =>
3513-
}
3514-
} catch {
3515-
case NonFatal(e) =>
3516-
willNotWorkOnGpu(
3517-
s"invalid LIKE escape pattern: " +
3518-
s"${e.getMessage}")
3505+
a.right match {
3506+
case Literal(v: UTF8String, _) =>
3507+
val pattern = v.toString
3508+
val esc = a.escapeChar
3509+
var i = 0
3510+
while (i < pattern.length) {
3511+
if (pattern.charAt(i) == esc) {
3512+
val j = i + 1
3513+
if (j >= pattern.length) {
3514+
willNotWorkOnGpu(
3515+
"invalid LIKE escape pattern")
3516+
return
3517+
}
3518+
val c = pattern.charAt(j)
3519+
if (c != '_' && c != '%' && c != esc) {
3520+
willNotWorkOnGpu(
3521+
"invalid LIKE escape pattern")
3522+
return
3523+
}
3524+
i = j + 1
3525+
} else {
3526+
i += 1
3527+
}
3528+
}
3529+
case _ =>
35193530
}
35203531
}
35213532
override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression =

sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import com.nvidia.spark.rapids.jni.RegexRewriteUtils
3636
import com.nvidia.spark.rapids.shims.{NullIntolerantShim, ShimExpression, SparkShimImpl}
3737

3838
import org.apache.spark.sql.catalyst.expressions._
39-
import org.apache.spark.sql.catalyst.util.StringUtils
4039
import org.apache.spark.sql.errors.ConvUtils
4140
import org.apache.spark.sql.rapids.catalyst.expressions._
4241
import org.apache.spark.sql.types._
@@ -977,19 +976,12 @@ case class GpuLike(left: Expression, right: Expression, escapeChar: Char)
977976

978977
def this(left: Expression, right: Expression) = this(left, right, '\\')
979978

980-
@transient private var escapeValidated = false
981-
982979
override def toString: String = escapeChar match {
983980
case '\\' => s"$left gpulike $right"
984981
case c => s"$left gpulike $right ESCAPE '$c'"
985982
}
986983

987984
override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = {
988-
if (!escapeValidated && rhs.isValid) {
989-
StringUtils.escapeLikeRegex(
990-
rhs.getValue.toString, escapeChar)
991-
escapeValidated = true
992-
}
993985
withResource(Scalar.fromString(Character.toString(escapeChar))) { escapeScalar =>
994986
lhs.getBase.like(rhs.getBase, escapeScalar)
995987
}

0 commit comments

Comments
 (0)