Skip to content

Commit e0d85ae

Browse files
kazantsev-maksimKazantsev Maksim
andauthored
Move string expressions in separate file (#1792)
Co-authored-by: Kazantsev Maksim <[email protected]>
1 parent 2665b52 commit e0d85ae

File tree

4 files changed

+558
-319
lines changed

4 files changed

+558
-319
lines changed

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 40 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,11 +1427,8 @@ object QueryPlanSerde extends Logging with CometExprShim {
14271427
val optExpr = scalarFunctionExprToProto("tan", childExpr)
14281428
optExprWithInfo(optExpr, expr, child)
14291429

1430-
case Ascii(child) =>
1431-
val castExpr = Cast(child, StringType)
1432-
val childExpr = exprToProtoInternal(castExpr, inputs, binding)
1433-
val optExpr = scalarFunctionExprToProto("ascii", childExpr)
1434-
optExprWithInfo(optExpr, expr, castExpr)
1430+
case _: Ascii =>
1431+
CometAscii.convert(expr, inputs, binding)
14351432

14361433
case Expm1(child) =>
14371434
val childExpr = exprToProtoInternal(child, inputs, binding)
@@ -1487,11 +1484,8 @@ object QueryPlanSerde extends Logging with CometExprShim {
14871484
None
14881485
}
14891486

1490-
case BitLength(child) =>
1491-
val castExpr = Cast(child, StringType)
1492-
val childExpr = exprToProtoInternal(castExpr, inputs, binding)
1493-
val optExpr = scalarFunctionExprToProto("bit_length", childExpr)
1494-
optExprWithInfo(optExpr, expr, castExpr)
1487+
case _: BitLength =>
1488+
CometBitLength.convert(expr, inputs, binding)
14951489

14961490
case If(predicate, trueValue, falseValue) =>
14971491
val predicateExpr = exprToProtoInternal(predicate, inputs, binding)
@@ -1546,41 +1540,17 @@ object QueryPlanSerde extends Logging with CometExprShim {
15461540
withInfo(expr, allBranches: _*)
15471541
None
15481542
}
1549-
case ConcatWs(children) =>
1550-
var childExprs: Seq[Expression] = Seq()
1551-
val exprs = children.map(e => {
1552-
val castExpr = Cast(e, StringType)
1553-
childExprs = childExprs :+ castExpr
1554-
exprToProtoInternal(castExpr, inputs, binding)
1555-
})
1556-
val optExpr = scalarFunctionExprToProto("concat_ws", exprs: _*)
1557-
optExprWithInfo(optExpr, expr, childExprs: _*)
1543+
case _: ConcatWs =>
1544+
CometConcatWs.convert(expr, inputs, binding)
15581545

1559-
case Chr(child) =>
1560-
val childExpr = exprToProtoInternal(child, inputs, binding)
1561-
val optExpr = scalarFunctionExprToProto("chr", childExpr)
1562-
optExprWithInfo(optExpr, expr, child)
1546+
case _: Chr =>
1547+
CometChr.convert(expr, inputs, binding)
15631548

1564-
case InitCap(child) =>
1565-
if (CometConf.COMET_EXEC_INITCAP_ENABLED.get()) {
1566-
val castExpr = Cast(child, StringType)
1567-
val childExpr = exprToProtoInternal(castExpr, inputs, binding)
1568-
val optExpr = scalarFunctionExprToProto("initcap", childExpr)
1569-
optExprWithInfo(optExpr, expr, castExpr)
1570-
} else {
1571-
withInfo(
1572-
expr,
1573-
"Comet initCap is not compatible with Spark yet. " +
1574-
"See https://github.com/apache/datafusion-comet/issues/1052 ." +
1575-
s"Set ${CometConf.COMET_EXEC_INITCAP_ENABLED.key}=true to enable it anyway.")
1576-
None
1577-
}
1549+
case _: InitCap =>
1550+
CometInitCap.convert(expr, inputs, binding)
15781551

1579-
case Length(child) =>
1580-
val castExpr = Cast(child, StringType)
1581-
val childExpr = exprToProtoInternal(castExpr, inputs, binding)
1582-
val optExpr = scalarFunctionExprToProto("length", childExpr)
1583-
optExprWithInfo(optExpr, expr, castExpr)
1552+
case _: Length =>
1553+
CometLength.convert(expr, inputs, binding)
15841554

15851555
case Md5(child) =>
15861556
val childExpr = exprToProtoInternal(child, inputs, binding)
@@ -1599,83 +1569,35 @@ object QueryPlanSerde extends Logging with CometExprShim {
15991569
val optExpr = scalarFunctionExprToProto("reverse", childExpr)
16001570
optExprWithInfo(optExpr, expr, castExpr)
16011571

1602-
case StringInstr(str, substr) =>
1603-
val leftCast = Cast(str, StringType)
1604-
val rightCast = Cast(substr, StringType)
1605-
val leftExpr = exprToProtoInternal(leftCast, inputs, binding)
1606-
val rightExpr = exprToProtoInternal(rightCast, inputs, binding)
1607-
val optExpr = scalarFunctionExprToProto("strpos", leftExpr, rightExpr)
1608-
optExprWithInfo(optExpr, expr, leftCast, rightCast)
1609-
1610-
case StringRepeat(str, times) =>
1611-
val leftCast = Cast(str, StringType)
1612-
val rightCast = Cast(times, LongType)
1613-
val leftExpr = exprToProtoInternal(leftCast, inputs, binding)
1614-
val rightExpr = exprToProtoInternal(rightCast, inputs, binding)
1615-
val optExpr = scalarFunctionExprToProto("repeat", leftExpr, rightExpr)
1616-
optExprWithInfo(optExpr, expr, leftCast, rightCast)
1617-
1618-
case StringReplace(src, search, replace) =>
1619-
val srcCast = Cast(src, StringType)
1620-
val searchCast = Cast(search, StringType)
1621-
val replaceCast = Cast(replace, StringType)
1622-
val srcExpr = exprToProtoInternal(srcCast, inputs, binding)
1623-
val searchExpr = exprToProtoInternal(searchCast, inputs, binding)
1624-
val replaceExpr = exprToProtoInternal(replaceCast, inputs, binding)
1625-
val optExpr = scalarFunctionExprToProto("replace", srcExpr, searchExpr, replaceExpr)
1626-
optExprWithInfo(optExpr, expr, srcCast, searchCast, replaceCast)
1627-
1628-
case StringTranslate(src, matching, replace) =>
1629-
val srcCast = Cast(src, StringType)
1630-
val matchingCast = Cast(matching, StringType)
1631-
val replaceCast = Cast(replace, StringType)
1632-
val srcExpr = exprToProtoInternal(srcCast, inputs, binding)
1633-
val matchingExpr = exprToProtoInternal(matchingCast, inputs, binding)
1634-
val replaceExpr = exprToProtoInternal(replaceCast, inputs, binding)
1635-
val optExpr = scalarFunctionExprToProto("translate", srcExpr, matchingExpr, replaceExpr)
1636-
optExprWithInfo(optExpr, expr, srcCast, matchingCast, replaceCast)
1637-
1638-
case StringTrim(srcStr, trimStr) =>
1639-
trim(expr, srcStr, trimStr, inputs, binding, "trim")
1640-
1641-
case StringTrimLeft(srcStr, trimStr) =>
1642-
trim(expr, srcStr, trimStr, inputs, binding, "ltrim")
1643-
1644-
case StringTrimRight(srcStr, trimStr) =>
1645-
trim(expr, srcStr, trimStr, inputs, binding, "rtrim")
1646-
1647-
case StringTrimBoth(srcStr, trimStr, _) =>
1648-
trim(expr, srcStr, trimStr, inputs, binding, "btrim")
1649-
1650-
case Upper(child) =>
1651-
if (CometConf.COMET_CASE_CONVERSION_ENABLED.get()) {
1652-
val castExpr = Cast(child, StringType)
1653-
val childExpr = exprToProtoInternal(castExpr, inputs, binding)
1654-
val optExpr = scalarFunctionExprToProto("upper", childExpr)
1655-
optExprWithInfo(optExpr, expr, castExpr)
1656-
} else {
1657-
withInfo(
1658-
expr,
1659-
"Comet is not compatible with Spark for case conversion in " +
1660-
s"locale-specific cases. Set ${CometConf.COMET_CASE_CONVERSION_ENABLED.key}=true " +
1661-
"to enable it anyway.")
1662-
None
1663-
}
1572+
case _: StringInstr =>
1573+
CometStringInstr.convert(expr, inputs, binding)
16641574

1665-
case Lower(child) =>
1666-
if (CometConf.COMET_CASE_CONVERSION_ENABLED.get()) {
1667-
val castExpr = Cast(child, StringType)
1668-
val childExpr = exprToProtoInternal(castExpr, inputs, binding)
1669-
val optExpr = scalarFunctionExprToProto("lower", childExpr)
1670-
optExprWithInfo(optExpr, expr, castExpr)
1671-
} else {
1672-
withInfo(
1673-
expr,
1674-
"Comet is not compatible with Spark for case conversion in " +
1675-
s"locale-specific cases. Set ${CometConf.COMET_CASE_CONVERSION_ENABLED.key}=true " +
1676-
"to enable it anyway.")
1677-
None
1678-
}
1575+
case _: StringRepeat =>
1576+
CometStringRepeat.convert(expr, inputs, binding)
1577+
1578+
case _: StringReplace =>
1579+
CometStringReplace.convert(expr, inputs, binding)
1580+
1581+
case _: StringTranslate =>
1582+
CometStringTranslate.convert(expr, inputs, binding)
1583+
1584+
case _: StringTrim =>
1585+
CometTrim.convert(expr, inputs, binding)
1586+
1587+
case _: StringTrimLeft =>
1588+
CometStringTrimLeft.convert(expr, inputs, binding)
1589+
1590+
case _: StringTrimRight =>
1591+
CometStringTrimRight.convert(expr, inputs, binding)
1592+
1593+
case _: StringTrimBoth =>
1594+
CometStringTrimBoth.convert(expr, inputs, binding)
1595+
1596+
case _: Upper =>
1597+
CometUpper.convert(expr, inputs, binding)
1598+
1599+
case _: Lower =>
1600+
CometLower.convert(expr, inputs, binding)
16791601

16801602
case BitwiseAnd(left, right) =>
16811603
createBinaryExpr(
@@ -2166,26 +2088,6 @@ object QueryPlanSerde extends Logging with CometExprShim {
21662088
}
21672089
}
21682090

2169-
def trim(
2170-
expr: Expression, // parent expression
2171-
srcStr: Expression,
2172-
trimStr: Option[Expression],
2173-
inputs: Seq[Attribute],
2174-
binding: Boolean,
2175-
trimType: String): Option[Expr] = {
2176-
val srcCast = Cast(srcStr, StringType)
2177-
val srcExpr = exprToProtoInternal(srcCast, inputs, binding)
2178-
if (trimStr.isDefined) {
2179-
val trimCast = Cast(trimStr.get, StringType)
2180-
val trimExpr = exprToProtoInternal(trimCast, inputs, binding)
2181-
val optExpr = scalarFunctionExprToProto(trimType, srcExpr, trimExpr)
2182-
optExprWithInfo(optExpr, expr, srcCast, trimCast)
2183-
} else {
2184-
val optExpr = scalarFunctionExprToProto(trimType, srcExpr)
2185-
optExprWithInfo(optExpr, expr, srcCast)
2186-
}
2187-
}
2188-
21892091
def in(
21902092
expr: Expression,
21912093
value: Expression,

0 commit comments

Comments
 (0)