@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
2828import org .apache .spark .sql .catalyst .expressions .objects .StaticInvoke
2929import org .apache .spark .sql .catalyst .optimizer .{BuildLeft , BuildRight , NormalizeNaNAndZero }
3030import org .apache .spark .sql .catalyst .plans ._
31- import org .apache .spark .sql .catalyst .util .{ CharVarcharCodegenUtils , GenericArrayData }
31+ import org .apache .spark .sql .catalyst .util .CharVarcharCodegenUtils
3232import org .apache .spark .sql .catalyst .util .ResolveDefaultColumns .getExistenceDefaultValues
3333import org .apache .spark .sql .comet ._
3434import org .apache .spark .sql .comet .execution .shuffle .CometShuffleExchangeExec
@@ -43,21 +43,17 @@ import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, HashJoin, Sh
4343import org .apache .spark .sql .execution .window .WindowExec
4444import org .apache .spark .sql .internal .SQLConf
4545import org .apache .spark .sql .types ._
46- import org .apache .spark .unsafe .types .UTF8String
47-
48- import com .google .protobuf .ByteString
4946
5047import org .apache .comet .{CometConf , ConfigEntry }
5148import org .apache .comet .CometSparkSessionExtensions .{isCometScan , withInfo }
52- import org .apache .comet .DataTypeSupport .isComplexType
5349import org .apache .comet .expressions ._
5450import org .apache .comet .objectstore .NativeConfig
5551import org .apache .comet .serde .ExprOuterClass .{AggExpr , Expr , ScalarFunc }
5652import org .apache .comet .serde .OperatorOuterClass .{AggregateMode => CometAggregateMode , BuildSide , JoinType , Operator }
5753import org .apache .comet .serde .QueryPlanSerde .{exprToProtoInternal , optExprWithInfo , scalarFunctionExprToProto }
5854import org .apache .comet .serde .Types .{DataType => ProtoDataType }
5955import org .apache .comet .serde .Types .DataType ._
60- import org .apache .comet .serde .Types . ListLiteral
56+ import org .apache .comet .serde .literals . CometLiteral
6157import org .apache .comet .shims .CometExprShim
6258
6359/**
@@ -213,7 +209,6 @@ object QueryPlanSerde extends Logging with CometExprShim {
213209 classOf [Cast ] -> CometCast )
214210
215211 private val miscExpressions : Map [Class [_ <: Expression ], CometExpressionSerde [_]] = Map (
216- // TODO Literal
217212 // TODO SortOrder (?)
218213 // TODO PromotePrecision
219214 // TODO CheckOverflow
@@ -225,9 +220,10 @@ object QueryPlanSerde extends Logging with CometExprShim {
225220 // TODO RegExpReplace
226221 classOf [Alias ] -> CometAlias ,
227222 classOf [AttributeReference ] -> CometAttributeReference ,
228- classOf [SparkPartitionID ] -> CometSparkPartitionId ,
223+ classOf [Coalesce ] -> CometCoalesce ,
224+ classOf [Literal ] -> CometLiteral ,
229225 classOf [MonotonicallyIncreasingID ] -> CometMonotonicallyIncreasingId ,
230- classOf [Coalesce ] -> CometCoalesce )
226+ classOf [SparkPartitionID ] -> CometSparkPartitionId )
231227
232228 /**
233229 * Mapping of Spark expression class to Comet expression handler.
@@ -677,147 +673,6 @@ object QueryPlanSerde extends Logging with CometExprShim {
677673 val cast = Cast (child, expr.dataType, Some (timeZoneId), EvalMode .TRY )
678674 convert(cast, CometCast )
679675
680- case Literal (value, dataType)
681- if supportedDataType(
682- dataType,
683- allowComplex = value == null ||
684- // Nested literal support for native reader
685- // can be tracked https://github.com/apache/datafusion-comet/issues/1937
686- // now supports only Array of primitive
687- (Seq (CometConf .SCAN_NATIVE_ICEBERG_COMPAT , CometConf .SCAN_NATIVE_DATAFUSION )
688- .contains(CometConf .COMET_NATIVE_SCAN_IMPL .get()) && dataType
689- .isInstanceOf [ArrayType ]) && ! isComplexType(
690- dataType.asInstanceOf [ArrayType ].elementType)) =>
691- val exprBuilder = LiteralOuterClass .Literal .newBuilder()
692-
693- if (value == null ) {
694- exprBuilder.setIsNull(true )
695- } else {
696- exprBuilder.setIsNull(false )
697- dataType match {
698- case _ : BooleanType => exprBuilder.setBoolVal(value.asInstanceOf [Boolean ])
699- case _ : ByteType => exprBuilder.setByteVal(value.asInstanceOf [Byte ])
700- case _ : ShortType => exprBuilder.setShortVal(value.asInstanceOf [Short ])
701- case _ : IntegerType | _ : DateType => exprBuilder.setIntVal(value.asInstanceOf [Int ])
702- case _ : LongType | _ : TimestampType | _ : TimestampNTZType =>
703- exprBuilder.setLongVal(value.asInstanceOf [Long ])
704- case _ : FloatType => exprBuilder.setFloatVal(value.asInstanceOf [Float ])
705- case _ : DoubleType => exprBuilder.setDoubleVal(value.asInstanceOf [Double ])
706- case _ : StringType =>
707- exprBuilder.setStringVal(value.asInstanceOf [UTF8String ].toString)
708- case _ : DecimalType =>
709- // Pass decimal literal as bytes.
710- val unscaled = value.asInstanceOf [Decimal ].toBigDecimal.underlying.unscaledValue
711- exprBuilder.setDecimalVal(
712- com.google.protobuf.ByteString .copyFrom(unscaled.toByteArray))
713- case _ : BinaryType =>
714- val byteStr =
715- com.google.protobuf.ByteString .copyFrom(value.asInstanceOf [Array [Byte ]])
716- exprBuilder.setBytesVal(byteStr)
717- case a : ArrayType =>
718- val listLiteralBuilder = ListLiteral .newBuilder()
719- val array = value.asInstanceOf [GenericArrayData ].array
720- a.elementType match {
721- case NullType =>
722- array.foreach(_ => listLiteralBuilder.addNullMask(true ))
723- case BooleanType =>
724- array.foreach(v => {
725- val casted = v.asInstanceOf [java.lang.Boolean ]
726- listLiteralBuilder.addBooleanValues(casted)
727- listLiteralBuilder.addNullMask(casted != null )
728- })
729- case ByteType =>
730- array.foreach(v => {
731- val casted = v.asInstanceOf [java.lang.Integer ]
732- listLiteralBuilder.addByteValues(casted)
733- listLiteralBuilder.addNullMask(casted != null )
734- })
735- case ShortType =>
736- array.foreach(v => {
737- val casted = v.asInstanceOf [java.lang.Short ]
738- listLiteralBuilder.addShortValues(
739- if (casted != null ) casted.intValue()
740- else null .asInstanceOf [java.lang.Integer ])
741- listLiteralBuilder.addNullMask(casted != null )
742- })
743- case IntegerType | DateType =>
744- array.foreach(v => {
745- val casted = v.asInstanceOf [java.lang.Integer ]
746- listLiteralBuilder.addIntValues(casted)
747- listLiteralBuilder.addNullMask(casted != null )
748- })
749- case LongType | TimestampType | TimestampNTZType =>
750- array.foreach(v => {
751- val casted = v.asInstanceOf [java.lang.Long ]
752- listLiteralBuilder.addLongValues(casted)
753- listLiteralBuilder.addNullMask(casted != null )
754- })
755- case FloatType =>
756- array.foreach(v => {
757- val casted = v.asInstanceOf [java.lang.Float ]
758- listLiteralBuilder.addFloatValues(casted)
759- listLiteralBuilder.addNullMask(casted != null )
760- })
761- case DoubleType =>
762- array.foreach(v => {
763- val casted = v.asInstanceOf [java.lang.Double ]
764- listLiteralBuilder.addDoubleValues(casted)
765- listLiteralBuilder.addNullMask(casted != null )
766- })
767- case StringType =>
768- array.foreach(v => {
769- val casted = v.asInstanceOf [org.apache.spark.unsafe.types.UTF8String ]
770- listLiteralBuilder.addStringValues(
771- if (casted != null ) casted.toString else " " )
772- listLiteralBuilder.addNullMask(casted != null )
773- })
774- case _ : DecimalType =>
775- array
776- .foreach(v => {
777- val casted =
778- v.asInstanceOf [Decimal ]
779- listLiteralBuilder.addDecimalValues(if (casted != null ) {
780- com.google.protobuf.ByteString
781- .copyFrom(casted.toBigDecimal.underlying.unscaledValue.toByteArray)
782- } else ByteString .EMPTY )
783- listLiteralBuilder.addNullMask(casted != null )
784- })
785- case _ : BinaryType =>
786- array
787- .foreach(v => {
788- val casted =
789- v.asInstanceOf [Array [Byte ]]
790- listLiteralBuilder.addBytesValues(if (casted != null ) {
791- com.google.protobuf.ByteString .copyFrom(casted)
792- } else ByteString .EMPTY )
793- listLiteralBuilder.addNullMask(casted != null )
794- })
795- }
796- exprBuilder.setListVal(listLiteralBuilder.build())
797- exprBuilder.setDatatype(serializeDataType(dataType).get)
798- case dt =>
799- logWarning(s " Unexpected datatype ' $dt' for literal value ' $value' " )
800- }
801- }
802-
803- val dt = serializeDataType(dataType)
804-
805- if (dt.isDefined) {
806- exprBuilder.setDatatype(dt.get)
807-
808- Some (
809- ExprOuterClass .Expr
810- .newBuilder()
811- .setLiteral(exprBuilder)
812- .build())
813- } else {
814- withInfo(expr, s " Unsupported datatype $dataType" )
815- None
816- }
817- case Literal (_, dataType) if ! supportedDataType(dataType) =>
818- withInfo(expr, s " Unsupported datatype $dataType" )
819- None
820-
821676 // ToPrettyString is new in Spark 3.5
822677 case _
823678 if expr.getClass.getSimpleName == " ToPrettyString" && expr
0 commit comments