Skip to content

Commit 3b0e2f7

Browse files
shirly121BingqingLyuzhanglei1949
authored
fix(interactive): Align Data Type in GIE Physical Pb with Flex (#4367)
<!-- Thanks for your contribution! please review https://github.com/alibaba/GraphScope/blob/main/CONTRIBUTING.md before opening an issue. --> ## What do these changes do? 1. support flex type system in GIE physical [proto](https://github.com/shirly121/GraphScope/blob/ir_align_type/interactive_engine/executor/ir/proto/basic_type.proto). 2. support type conversion among different type systems, we have 3 type systems currently: a. Groot: defined in [proto](https://github.com/shirly121/GraphScope/blob/ir_align_type/proto/schema_common.proto) b. Flex: defined in [proto](https://github.com/shirly121/GraphScope/blob/ir_align_type/interactive_engine/executor/ir/proto/basic_type.proto) c. Calcite, defined in [java](https://github.com/shirly121/GraphScope/blob/ir_align_type/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/type/GraphTypeFactoryImpl.java) The Calcite type system serves as the foundation for type inference in the compiler. Consequently, any other type system (e.g., Groot or Flex) must be convertible to Calcite. The [IrDataTypeConvertor](https://github.com/alibaba/GraphScope/pull/4367/files#diff-6844283ba782602bd37d9f17f56646af7188f4ba849abc5ac8d1f404ce122e22) plays a key role in this process. It defines the bidirectional type conversions between Calcite and the other two type systems. <!-- Please give a short brief about these changes. --> ## Related issue number <!-- Are there any issues opened that will be resolved by merging this change? --> Fixes #4362 --------- Co-authored-by: BingqingLyu <[email protected]> Co-authored-by: xiaolei.zl <[email protected]>
1 parent bbc7786 commit 3b0e2f7

File tree

115 files changed

+3936
-3888
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+3936
-3888
lines changed

.github/workflows/interactive.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,12 +212,9 @@ jobs:
212212
GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d ${TMP_INTERACTIVE_WORKSPACE}/data/modern_graph/indices/
213213
cd ${GITHUB_WORKSPACE}/flex/tests/hqps
214214
sed -i 's/interactive_workspace/temp_workspace/g' ./interactive_config_test.yaml
215-
# set thread_num_per_worker to 4
216-
sed -i 's/thread_num_per_worker: 1/thread_num_per_worker: 4/g' ./interactive_config_test.yaml
217215
bash hqps_sdk_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml java
218216
bash hqps_sdk_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml python
219217
sed -i 's/temp_workspace/interactive_workspace/g' ./interactive_config_test.yaml
220-
sed -i 's/thread_num_per_worker: 4/thread_num_per_worker: 1/g' ./interactive_config_test.yaml
221218
222219
- name: Robustness test
223220
env:

flex/codegen/src/codegen_utils.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,23 @@ std::string generate_output_list(std::string input_name, int32_t input_size,
132132
// check type consistent
133133
bool data_type_consistent(const common::DataType& left,
134134
const common::DataType& right) {
135-
if (left == common::DataType::NONE || right == common::DataType::NONE) {
136-
return true;
135+
if (left.item_case() == common::DataType::ITEM_NOT_SET) {
136+
return false;
137+
}
138+
if (left.item_case() != right.item_case()) {
139+
return false;
140+
}
141+
if (left.item_case() == common::DataType::kPrimitiveType) {
142+
return left.primitive_type() == right.primitive_type();
143+
} else if (left.item_case() == common::DataType::kArray ||
144+
left.item_case() == common::DataType::kMap) {
145+
LOG(FATAL) << "Not support list or map type";
146+
} else if (left.item_case() == common::DataType::kString) {
147+
return true; // string type is always consistent
148+
} else {
149+
LOG(FATAL) << "Unexpected data type";
150+
return false;
137151
}
138-
return left == right;
139152
}
140153

141154
std::tuple<std::string, std::string> decode_param_from_decoder(

flex/codegen/src/graph_types.h

Lines changed: 88 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ limitations under the License.
1919
#include <type_traits>
2020

2121
#include "flex/codegen/src/string_utils.h"
22+
#include "flex/proto_generated_gie/basic_type.pb.h"
2223
#include "flex/proto_generated_gie/common.pb.h"
2324
#include "glog/logging.h"
2425
#include "google/protobuf/any.h"
@@ -63,62 +64,109 @@ inline bool operator==(const ParamConst& lhs, const ParamConst& rhs) {
6364

6465
} // namespace codegen
6566

66-
static codegen::DataType common_data_type_pb_2_data_type(
67-
const common::DataType& data_type) {
68-
switch (data_type) {
69-
case common::DataType::INT32:
67+
static codegen::DataType primitive_type_to_data_type(
68+
const common::PrimitiveType& type) {
69+
switch (type) {
70+
case common::PrimitiveType::DT_SIGNED_INT32:
7071
return codegen::DataType::kInt32;
71-
case common::DataType::INT64:
72+
case common::PrimitiveType::DT_SIGNED_INT64:
7273
return codegen::DataType::kInt64;
73-
case common::DataType::DOUBLE:
74+
case common::PrimitiveType::DT_FLOAT:
75+
return codegen::DataType::kFloat;
76+
case common::PrimitiveType::DT_DOUBLE:
7477
return codegen::DataType::kDouble;
75-
case common::DataType::STRING:
76-
return codegen::DataType::kString;
77-
case common::DataType::INT64_ARRAY:
78-
return codegen::DataType::kInt64Array;
79-
case common::DataType::INT32_ARRAY:
80-
return codegen::DataType::kInt32Array;
81-
case common::DataType::BOOLEAN:
78+
case common::PrimitiveType::DT_BOOL:
8279
return codegen::DataType::kBoolean;
83-
case common::DataType::DATE32:
80+
default:
81+
// LOG(FATAL) << "unknown primitive type";
82+
throw std::runtime_error(
83+
"unknown primitive type when converting primitive type to data type:" +
84+
std::to_string(static_cast<int>(type)));
85+
}
86+
}
87+
88+
static codegen::DataType temporal_type_to_data_type(
89+
const common::Temporal& type) {
90+
switch (type.item_case()) {
91+
case common::Temporal::ItemCase::kDate:
8492
return codegen::DataType::kDate;
85-
case common::DataType::TIME32:
93+
case common::Temporal::ItemCase::kTime:
8694
return codegen::DataType::kTime;
87-
case common::DataType::TIMESTAMP:
95+
case common::Temporal::kTimestamp:
8896
return codegen::DataType::kTimeStamp;
97+
default:
98+
throw std::runtime_error(
99+
"unknown temporal type when converting temporal type to data type:" +
100+
std::to_string(static_cast<int>(type.item_case())));
101+
}
102+
}
103+
104+
static codegen::DataType common_data_type_pb_2_data_type(
105+
const common::DataType& data_type) {
106+
switch (data_type.item_case()) {
107+
case common::DataType::ItemCase::kPrimitiveType:
108+
return primitive_type_to_data_type(data_type.primitive_type());
109+
case common::DataType::ItemCase::kDecimal:
110+
LOG(FATAL) << "Not support decimal type";
111+
case common::DataType::ItemCase::kString:
112+
return codegen::DataType::kString;
113+
case common::DataType::ItemCase::kTemporal:
114+
return temporal_type_to_data_type(data_type.temporal());
115+
case common::DataType::ItemCase::kArray:
116+
case common::DataType::ItemCase::kMap:
117+
LOG(FATAL) << "Not support array or map type";
89118
default:
90119
// LOG(FATAL) << "unknown data type";
91120
throw std::runtime_error(
92121
"unknown data type when converting common_data_type to inner data "
93122
"type:" +
94-
std::to_string(static_cast<int>(data_type)));
123+
data_type.DebugString());
95124
}
96125
}
97126

98-
static std::string single_common_data_type_pb_2_str(
99-
const common::DataType& data_type) {
100-
switch (data_type) {
101-
case common::DataType::BOOLEAN:
102-
return "bool";
103-
case common::DataType::INT32:
127+
static std::string primitive_type_to_str(const common::PrimitiveType& type) {
128+
switch (type) {
129+
case common::PrimitiveType::DT_SIGNED_INT32:
104130
return "int32_t";
105-
case common::DataType::INT64:
131+
case common::PrimitiveType::DT_UNSIGNED_INT32:
132+
return "uint32_t";
133+
case common::PrimitiveType::DT_SIGNED_INT64:
106134
return "int64_t";
107-
case common::DataType::DOUBLE:
135+
case common::PrimitiveType::DT_UNSIGNED_INT64:
136+
return "uint64_t";
137+
case common::PrimitiveType::DT_FLOAT:
138+
return "float";
139+
case common::PrimitiveType::DT_DOUBLE:
108140
return "double";
109-
case common::DataType::STRING:
141+
case common::PrimitiveType::DT_BOOL:
142+
return "bool";
143+
default:
144+
// LOG(FATAL) << "unknown primitive type";
145+
throw std::runtime_error(
146+
"unknown primitive type when converting primitive type to string:" +
147+
std::to_string(static_cast<int>(type)));
148+
}
149+
}
150+
151+
static std::string single_common_data_type_pb_2_str(
152+
const common::DataType& data_type) {
153+
switch (data_type.item_case()) {
154+
case common::DataType::ItemCase::kPrimitiveType:
155+
return primitive_type_to_str(data_type.primitive_type());
156+
case common::DataType::ItemCase::kDecimal:
157+
LOG(FATAL) << "Not support decimal type";
158+
case common::DataType::ItemCase::kString:
110159
return "std::string_view";
111-
case common::DataType::INT64_ARRAY:
112-
return "std::vector<int64_t>";
113-
case common::DataType::INT32_ARRAY:
114-
return "std::vector<int32_t>";
115-
case common::DataType::DATE32:
116-
return "Date";
160+
case common::DataType::ItemCase::kTemporal:
161+
LOG(FATAL) << "Not support temporal type";
162+
case common::DataType::ItemCase::kArray:
163+
case common::DataType::ItemCase::kMap:
164+
LOG(FATAL) << "Not support array or map type";
117165
// TODO: support time32 and timestamp
118166
default:
119167
throw std::runtime_error(
120168
"unknown data type when convert common data type to string:" +
121-
std::to_string(static_cast<int>(data_type)));
169+
data_type.DebugString());
122170
}
123171
}
124172

@@ -266,21 +314,22 @@ static std::string data_type_2_rust_string(const codegen::DataType& data_type) {
266314
}
267315

268316
static common::DataType common_value_2_data_type(const common::Value& value) {
317+
common::DataType ret;
269318
switch (value.item_case()) {
270319
case common::Value::kI32:
271-
return common::DataType::INT32;
320+
ret.set_primitive_type(common::PrimitiveType::DT_SIGNED_INT32);
272321
case common::Value::kI64:
273-
return common::DataType::INT64;
322+
ret.set_primitive_type(common::PrimitiveType::DT_SIGNED_INT64);
274323
case common::Value::kBoolean:
275-
return common::DataType::BOOLEAN;
324+
ret.set_primitive_type(common::PrimitiveType::DT_BOOL);
276325
case common::Value::kF64:
277-
return common::DataType::DOUBLE;
326+
ret.set_primitive_type(common::PrimitiveType::DT_DOUBLE);
278327
case common::Value::kStr:
279-
return common::DataType::STRING;
328+
ret.mutable_string()->mutable_long_text();
280329
default:
281330
LOG(FATAL) << "unknown value" << value.DebugString();
282331
}
283-
return common::DataType::NONE;
332+
return ret;
284333
}
285334

286335
static void parse_param_const_from_pb(

flex/codegen/src/hqps/hqps_case_when_builder.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,8 @@ class CaseWhenBuilder : public ExprBuilder {
118118

119119
std::string str = formater.str();
120120

121-
return std::make_tuple(
122-
class_name_, construct_params_, tag_selectors_, str,
123-
std::vector{common::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_});
121+
return std::make_tuple(class_name_, construct_params_, tag_selectors_, str,
122+
std::vector{common::DataType()});
124123
}
125124

126125
protected:

flex/codegen/src/hqps/hqps_edge_expand_builder.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,9 @@ static void BuildExprFromPredicate(BuildingContext& ctx,
224224
std::string& func_construct_params_str,
225225
std::string& property_selectors_str) {
226226
auto expr_builder = ExprBuilder(ctx);
227-
expr_builder.set_return_type(common::DataType::BOOLEAN);
227+
common::DataType type;
228+
type.set_primitive_type(common::PrimitiveType::DT_BOOL);
229+
expr_builder.set_return_type(type);
228230
expr_builder.AddAllExprOpr(expr.operators());
229231
std::string expr_code;
230232
std::vector<codegen::ParamConst> func_call_param_const;

flex/codegen/src/hqps/hqps_get_v_builder.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ class GetVOpBuilder {
135135

136136
auto& expr_oprs = expr.operators();
137137
expr_builder.AddAllExprOpr(expr_oprs);
138-
expr_builder.set_return_type(common::DataType::BOOLEAN);
138+
common::DataType data_type;
139+
data_type.set_primitive_type(common::PrimitiveType::DT_BOOL);
140+
expr_builder.set_return_type(data_type);
139141
std::vector<common::DataType> unused_expr_ret_type;
140142
if (!expr_builder.empty()) {
141143
std::tie(expr_name_, expr_call_param_, tag_properties_, expr_code_,

flex/codegen/src/hqps/hqps_scan_builder.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,9 @@ class ScanOpBuilder {
149149

150150
// TODO: make expr_builder a member of ScanOpBuilder
151151
// auto expr_builder = ExprBuilder(ctx_);
152-
expr_builder_.set_return_type(common::DataType::BOOLEAN);
152+
common::DataType type;
153+
type.set_primitive_type(common::PrimitiveType::DT_BOOL);
154+
expr_builder_.set_return_type(common::DataType(type));
153155
// Add extra (, ) to wrap the code, since we may append index_predicate
154156
// afterwards.
155157
common::ExprOpr left_brace, right_brace;

flex/codegen/src/hqps/hqps_select_builder.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ class SelectOpBuilder {
4141

4242
SelectOpBuilder& expr(const common::Expression expr) {
4343
ExprBuilder expr_builder(ctx_);
44-
expr_builder.set_return_type(common::DataType::BOOLEAN);
44+
common::DataType data_type;
45+
data_type.set_primitive_type(common::PrimitiveType::DT_BOOL);
46+
expr_builder.set_return_type(data_type);
4547
expr_builder.AddAllExprOpr(expr.operators());
4648

4749
std::string func_code;

flex/codegen/src/pegasus/pegasus_order_by_builder.h

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,29 @@ class OrderByOpBuilder {
7272
ss << ".then(";
7373
}
7474
std::string cmp_type;
75-
switch (data_type) {
76-
case common::DataType::BOOLEAN:
77-
case common::DataType::INT32:
78-
case common::DataType::INT64:
79-
case common::DataType::STRING: {
80-
cmp_type = "cmp";
81-
break;
75+
switch (data_type.item_case()) {
76+
case common::DataType::kPrimitiveType: {
77+
switch (data_type.primitive_type()) {
78+
case common::PrimitiveType::DT_BOOL:
79+
case common::PrimitiveType::DT_SIGNED_INT32:
80+
case common::PrimitiveType::DT_SIGNED_INT64:
81+
cmp_type = "cmp";
82+
break;
83+
case common::PrimitiveType::DT_DOUBLE: {
84+
cmp_type = "partial_cmp";
85+
break;
86+
}
87+
default:
88+
LOG(FATAL) << "Unsupported type "
89+
<< static_cast<int32_t>(data_type.primitive_type());
90+
}
8291
}
83-
case common::DataType::DOUBLE: {
84-
cmp_type = "partial_cmp";
92+
case common::DataType::kString: {
93+
cmp_type = "cmp";
8594
break;
8695
}
8796
default:
88-
LOG(FATAL) << "Unsupported type " << data_type;
97+
LOG(FATAL) << "Unsupported type " << data_type.DebugString();
8998
}
9099
std::string reverse_str;
91100
if (ordering_pair_[i].order() == algebra::OrderBy_OrderingPair_Order::

flex/codegen/src/pegasus/pegasus_project_builder.h

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,24 @@ class ProjectOpBuilder {
176176
ctx_.SetOutput(i, data_types);
177177
} else if (column_meta.type().type_case() ==
178178
common::IrDataType::kDataType) {
179-
switch (column_meta.type().data_type()) {
180-
case common::DataType::INT64: {
181-
std::vector<codegen::DataType> data_types;
182-
data_types.push_back(codegen::DataType::kInt64);
183-
ctx_.SetOutput(i, data_types);
184-
break;
179+
switch (column_meta.type().data_type().item_case()) {
180+
case common::DataType::kPrimitiveType: {
181+
auto data_type = column_meta.type().data_type().primitive_type();
182+
switch (data_type) {
183+
case common::PrimitiveType::DT_SIGNED_INT64: {
184+
std::vector<codegen::DataType> data_types;
185+
data_types.push_back(codegen::DataType::kInt64);
186+
ctx_.SetOutput(i, data_types);
187+
break;
188+
}
189+
default: {
190+
std::vector<codegen::DataType> data_types;
191+
data_types.push_back(codegen::DataType::kString);
192+
ctx_.SetOutput(i, data_types);
193+
}
194+
}
185195
}
186-
case common::DataType::STRING: {
196+
case common::DataType::kString: {
187197
std::vector<codegen::DataType> data_types;
188198
data_types.push_back(codegen::DataType::kString);
189199
ctx_.SetOutput(i, data_types);

0 commit comments

Comments
 (0)