Skip to content

Commit 62b3c91

Browse files
authored
fix: split expr.proto file (new) (#2267)
* split expr.proto file
1 parent 0dc71c0 commit 62b3c91

File tree

7 files changed

+119
-87
lines changed

7 files changed

+119
-87
lines changed

native/proto/src/proto/expr.proto

Lines changed: 1 addition & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ syntax = "proto3";
2121

2222
package spark.spark_expression;
2323

24+
import "literal.proto";
2425
import "types.proto";
2526

2627
option java_package = "org.apache.comet.serde";
@@ -203,27 +204,6 @@ message BloomFilterAgg {
203204
DataType datatype = 4;
204205
}
205206

206-
message Literal {
207-
oneof value {
208-
bool bool_val = 1;
209-
// Protobuf doesn't provide int8 and int16, we put them into int32 and convert
210-
// to int8 and int16 when deserializing.
211-
int32 byte_val = 2;
212-
int32 short_val = 3;
213-
int32 int_val = 4;
214-
int64 long_val = 5;
215-
float float_val = 6;
216-
double double_val = 7;
217-
string string_val = 8;
218-
bytes bytes_val = 9;
219-
bytes decimal_val = 10;
220-
ListLiteral list_val = 11;
221-
}
222-
223-
DataType datatype = 12;
224-
bool is_null = 13;
225-
}
226-
227207
enum EvalMode {
228208
LEGACY = 0;
229209
TRY = 1;
@@ -426,59 +406,3 @@ message ArrayJoin {
426406
message Rand {
427407
int64 seed = 1;
428408
}
429-
430-
message DataType {
431-
enum DataTypeId {
432-
BOOL = 0;
433-
INT8 = 1;
434-
INT16 = 2;
435-
INT32 = 3;
436-
INT64 = 4;
437-
FLOAT = 5;
438-
DOUBLE = 6;
439-
STRING = 7;
440-
BYTES = 8;
441-
TIMESTAMP = 9;
442-
DECIMAL = 10;
443-
TIMESTAMP_NTZ = 11;
444-
DATE = 12;
445-
NULL = 13;
446-
LIST = 14;
447-
MAP = 15;
448-
STRUCT = 16;
449-
}
450-
DataTypeId type_id = 1;
451-
452-
message DataTypeInfo {
453-
oneof datatype_struct {
454-
DecimalInfo decimal = 2;
455-
ListInfo list = 3;
456-
MapInfo map = 4;
457-
StructInfo struct = 5;
458-
}
459-
}
460-
461-
message DecimalInfo {
462-
int32 precision = 1;
463-
int32 scale = 2;
464-
}
465-
466-
message ListInfo {
467-
DataType element_type = 1;
468-
bool contains_null = 2;
469-
}
470-
471-
message MapInfo {
472-
DataType key_type = 1;
473-
DataType value_type = 2;
474-
bool value_contains_null = 3;
475-
}
476-
477-
message StructInfo {
478-
repeated string field_names = 1;
479-
repeated DataType field_datatypes = 2;
480-
repeated bool field_nullable = 3;
481-
}
482-
483-
DataTypeInfo type_info = 2;
484-
}

native/proto/src/proto/literal.proto

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
19+
20+
syntax = "proto3";
21+
22+
package spark.spark_expression;
23+
24+
import "types.proto";
25+
26+
option java_package = "org.apache.comet.serde";
27+
28+
message Literal {
29+
oneof value {
30+
bool bool_val = 1;
31+
// Protobuf doesn't provide int8 and int16, we put them into int32 and convert
32+
// to int8 and int16 when deserializing.
33+
int32 byte_val = 2;
34+
int32 short_val = 3;
35+
int32 int_val = 4;
36+
int64 long_val = 5;
37+
float float_val = 6;
38+
double double_val = 7;
39+
string string_val = 8;
40+
bytes bytes_val = 9;
41+
bytes decimal_val = 10;
42+
ListLiteral list_val = 11;
43+
}
44+
45+
DataType datatype = 12;
46+
bool is_null = 13;
47+
}

native/proto/src/proto/operator.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ package spark.spark_operator;
2323

2424
import "expr.proto";
2525
import "partitioning.proto";
26+
import "types.proto";
2627

2728
option java_package = "org.apache.comet.serde";
2829

native/proto/src/proto/types.proto

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,60 @@ message ListLiteral {
3838
repeated ListLiteral list_values = 11;
3939

4040
repeated bool null_mask = 12;
41-
}
41+
}
42+
43+
message DataType {
44+
enum DataTypeId {
45+
BOOL = 0;
46+
INT8 = 1;
47+
INT16 = 2;
48+
INT32 = 3;
49+
INT64 = 4;
50+
FLOAT = 5;
51+
DOUBLE = 6;
52+
STRING = 7;
53+
BYTES = 8;
54+
TIMESTAMP = 9;
55+
DECIMAL = 10;
56+
TIMESTAMP_NTZ = 11;
57+
DATE = 12;
58+
NULL = 13;
59+
LIST = 14;
60+
MAP = 15;
61+
STRUCT = 16;
62+
}
63+
DataTypeId type_id = 1;
64+
65+
message DataTypeInfo {
66+
oneof datatype_struct {
67+
DecimalInfo decimal = 2;
68+
ListInfo list = 3;
69+
MapInfo map = 4;
70+
StructInfo struct = 5;
71+
}
72+
}
73+
74+
message DecimalInfo {
75+
int32 precision = 1;
76+
int32 scale = 2;
77+
}
78+
79+
message ListInfo {
80+
DataType element_type = 1;
81+
bool contains_null = 2;
82+
}
83+
84+
message MapInfo {
85+
DataType key_type = 1;
86+
DataType value_type = 2;
87+
bool value_contains_null = 3;
88+
}
89+
90+
message StructInfo {
91+
repeated string field_names = 1;
92+
repeated DataType field_datatypes = 2;
93+
repeated bool field_nullable = 3;
94+
}
95+
96+
DataTypeInfo type_info = 2;
97+
}

spark/src/main/scala/org/apache/comet/parquet/SourceFilterSerde.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@ import org.apache.spark.sql.types._
2929

3030
import org.apache.comet.serde.ExprOuterClass
3131
import org.apache.comet.serde.ExprOuterClass.Expr
32+
import org.apache.comet.serde.LiteralOuterClass
3233
import org.apache.comet.serde.QueryPlanSerde.serializeDataType
3334

3435
object SourceFilterSerde extends Logging {
3536

3637
def createNameExpr(
3738
name: String,
38-
schema: StructType): Option[(DataType, ExprOuterClass.Expr)] = {
39+
schema: StructType): Option[(org.apache.spark.sql.types.DataType, ExprOuterClass.Expr)] = {
3940
val filedWithIndex = schema.fields.zipWithIndex.find { case (field, _) =>
4041
field.name == name
4142
}
@@ -66,8 +67,10 @@ object SourceFilterSerde extends Logging {
6667
/**
6768
* create a literal value native expression for source filter value, the value is a scala value
6869
*/
69-
def createValueExpr(value: Any, dataType: DataType): Option[ExprOuterClass.Expr] = {
70-
val exprBuilder = ExprOuterClass.Literal.newBuilder()
70+
def createValueExpr(
71+
value: Any,
72+
dataType: org.apache.spark.sql.types.DataType): Option[ExprOuterClass.Expr] = {
73+
val exprBuilder = LiteralOuterClass.Literal.newBuilder()
7174
var valueIsSet = true
7275
if (value == null) {
7376
exprBuilder.setIsNull(true)

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,11 @@ import org.apache.comet.CometSparkSessionExtensions.{isCometScan, withInfo}
5252
import org.apache.comet.DataTypeSupport.isComplexType
5353
import org.apache.comet.expressions._
5454
import org.apache.comet.objectstore.NativeConfig
55-
import org.apache.comet.serde.ExprOuterClass.{AggExpr, DataType => ProtoDataType, Expr, ScalarFunc}
56-
import org.apache.comet.serde.ExprOuterClass.DataType._
55+
import org.apache.comet.serde.ExprOuterClass.{AggExpr, Expr, ScalarFunc}
5756
import org.apache.comet.serde.OperatorOuterClass.{AggregateMode => CometAggregateMode, BuildSide, JoinType, Operator}
5857
import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto}
58+
import org.apache.comet.serde.Types.{DataType => ProtoDataType}
59+
import org.apache.comet.serde.Types.DataType._
5960
import org.apache.comet.serde.Types.ListLiteral
6061
import org.apache.comet.shims.CometExprShim
6162

@@ -228,7 +229,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
228229
* doesn't mean it is supported by Comet native execution, i.e., `supportedDataType` may return
229230
* false for it.
230231
*/
231-
def serializeDataType(dt: DataType): Option[ExprOuterClass.DataType] = {
232+
def serializeDataType(dt: org.apache.spark.sql.types.DataType): Option[Types.DataType] = {
232233
val typeId = dt match {
233234
case _: BooleanType => 0
234235
case _: ByteType => 1
@@ -762,7 +763,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
762763
.contains(CometConf.COMET_NATIVE_SCAN_IMPL.get()) && dataType
763764
.isInstanceOf[ArrayType]) && !isComplexType(
764765
dataType.asInstanceOf[ArrayType].elementType)) =>
765-
val exprBuilder = ExprOuterClass.Literal.newBuilder()
766+
val exprBuilder = LiteralOuterClass.Literal.newBuilder()
766767

767768
if (value == null) {
768769
exprBuilder.setIsNull(true)

spark/src/main/scala/org/apache/comet/serde/hash.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ object CometXxHash64 extends CometExpressionSerde[XxHash64] {
3434
return None
3535
}
3636
val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding))
37-
val seedBuilder = ExprOuterClass.Literal
37+
val seedBuilder = LiteralOuterClass.Literal
3838
.newBuilder()
3939
.setDatatype(serializeDataType(LongType).get)
4040
.setLongVal(expr.seed)
@@ -53,7 +53,7 @@ object CometMurmur3Hash extends CometExpressionSerde[Murmur3Hash] {
5353
return None
5454
}
5555
val exprs = expr.children.map(exprToProtoInternal(_, inputs, binding))
56-
val seedBuilder = ExprOuterClass.Literal
56+
val seedBuilder = LiteralOuterClass.Literal
5757
.newBuilder()
5858
.setDatatype(serializeDataType(IntegerType).get)
5959
.setIntVal(expr.seed)

0 commit comments

Comments
 (0)