Skip to content

Commit 6284e0f

Browse files
authored
introduce varchar type (#82)
1 parent dbf0072 commit 6284e0f

File tree

11 files changed

+270
-0
lines changed

11 files changed

+270
-0
lines changed

include/substrait-mlir/Dialect/Substrait/IR/SubstraitAttrs.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,18 @@ def Substrait_UUIDAttr
192192
let assemblyFormat = [{ `<` $value `>` }];
193193
}
194194

195+
def Substrait_VarCharAttr
196+
: Substrait_Attr<"VarChar", "var_char", [TypedAttrInterface]> {
197+
let summary = "Substrait varchar type";
198+
let description = [{
199+
This type represents a substrait varchar attribute type, namely a unicode
200+
string of at most L characters.
201+
}];
202+
let parameters = (ins "StringAttr":$value, "VarCharType":$type);
203+
let assemblyFormat = [{ `<` $value `,` custom<VarCharTypeByLength>($type) `>` }];
204+
let genVerifyDecl = 1;
205+
}
206+
195207
def Substrait_VersionAttr : Substrait_Attr<"Version", "version"> {
196208
let summary = "Substrait version";
197209
let description = [{
@@ -258,6 +270,7 @@ def Substrait_SimpleAttributes {
258270
def Substrait_ParametrizedAttributes {
259271
list<Attr> attrs = [
260272
Substrait_FixedCharAttr, // FixedChar
273+
Substrait_VarCharAttr, // VarChar
261274
Substrait_DecimalAttr, // Decimal
262275
];
263276
}

include/substrait-mlir/Dialect/Substrait/IR/SubstraitTypes.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,16 @@ def Substrait_UUIDType : Substrait_Type<"UUID", "uuid"> {
104104
}];
105105
}
106106

107+
def Substrait_VarCharType : Substrait_Type<"VarChar", "var_char"> {
108+
let summary = "Substrait variable-length char type";
109+
let description = [{
110+
This type represents a substrait variable-length unicode string of at most L
111+
characters.
112+
}];
113+
let parameters = (ins "int32_t":$length);
114+
let assemblyFormat = [{ `<` $length `>` }];
115+
}
116+
107117
/// Currently supported simple types, listed in order of the Substrait specification.
108118
/// These correspond directly to the types in
109119
/// https://github.com/substrait-io/substrait/blob/main/proto/substrait/type.proto.
@@ -134,6 +144,7 @@ def Substrait_SimpleTypes {
134144
def Substrait_ParametrizedTypes {
135145
list<Type> types = [
136146
Substrait_FixedCharType, // FixedChar
147+
Substrait_VarCharType, // VarChar
137148
Substrait_DecimalType, // Decimal
138149
];
139150
}

lib/Dialect/Substrait/IR/Substrait.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,16 @@ LogicalResult mlir::substrait::IntervalDaySecondAttr::verify(
103103
return success();
104104
}
105105

106+
LogicalResult mlir::substrait::VarCharAttr::verify(
107+
llvm::function_ref<mlir::InFlightDiagnostic()> emitError, StringAttr value,
108+
VarCharType type) {
109+
int32_t value_length = value.size();
110+
if (value_length > type.getLength())
111+
return emitError() << "value length must be at most " << type.getLength()
112+
<< " characters.";
113+
return success();
114+
}
115+
106116
//===----------------------------------------------------------------------===//
107117
// Substrait types
108118
//===----------------------------------------------------------------------===//
@@ -288,6 +298,26 @@ void printCountAsAll(OpAsmPrinter &printer, Operation *op, IntegerAttr count) {
288298
printer << count.getValue();
289299
}
290300

301+
// Parses a VarCharType by extracting the length from the given parser. Assumes
302+
// the length is surrounded by `<` and `>` symbols, which are removed. On
303+
// success, assigns the parsed type to `type` and returns success.
304+
ParseResult parseVarCharTypeByLength(AsmParser &parser, VarCharType &type) {
305+
// remove `<` and `>` symbols
306+
int64_t result;
307+
if (parser.parseInteger(result))
308+
return failure();
309+
310+
type = VarCharType::get(parser.getContext(), result);
311+
312+
return success();
313+
}
314+
315+
// Prints the VarCharType by outputting its length to the given printer.
316+
void printVarCharTypeByLength(AsmPrinter &printer, VarCharType type) {
317+
// Normal integer.
318+
printer << type.getLength();
319+
}
320+
291321
ParseResult parseDecimalNumber(AsmParser &parser, DecimalType &type,
292322
IntegerAttr &value) {
293323
llvm::SMLoc loc = parser.getCurrentLocation();

lib/Target/SubstraitPB/Export.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,18 @@ SubstraitExporter::exportType(Location loc, mlir::Type mlirType) {
391391
return std::move(type);
392392
}
393393

394+
// Handle varchar.
395+
if (mlir::isa<VarCharType>(mlirType)) {
396+
// TODO(ingomueller): support other nullability modes.
397+
auto varCharType = std::make_unique<proto::Type::VarChar>();
398+
varCharType->set_length(mlir::cast<VarCharType>(mlirType).getLength());
399+
varCharType->set_nullability(
400+
Type_Nullability::Type_Nullability_NULLABILITY_REQUIRED);
401+
auto type = std::make_unique<proto::Type>();
402+
type->set_allocated_varchar(varCharType.release());
403+
return std::move(type);
404+
}
405+
394406
// Handle decimal.
395407
if (auto decimalType = llvm::dyn_cast<DecimalType>(mlirType)) {
396408
auto decimalTypeProto = std::make_unique<proto::Type::Decimal>();
@@ -1002,6 +1014,12 @@ SubstraitExporter::exportOperation(LiteralOp op) {
10021014
// `FixedCharType`.
10031015
} else if (auto fixedCharType = dyn_cast<FixedCharType>(literalType)) {
10041016
literal->set_fixed_char(mlir::cast<FixedCharAttr>(value).getValue().str());
1017+
// `VarCharType`.
1018+
} else if (auto varCharType = dyn_cast<VarCharType>(literalType)) {
1019+
auto varChar =
1020+
std::make_unique<::substrait::proto::Expression_Literal_VarChar>();
1021+
varChar->set_value(mlir::cast<VarCharAttr>(value).getValue().str());
1022+
literal->set_allocated_var_char(varChar.release());
10051023
} // `DecimalType`.
10061024
else if (auto decimalType = dyn_cast<DecimalType>(literalType)) {
10071025
auto decimal =

lib/Target/SubstraitPB/Import.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ static mlir::FailureOr<mlir::Type> importType(MLIRContext *context,
219219
return UUIDType::get(context);
220220
case proto::Type::kFixedChar:
221221
return FixedCharType::get(context, type.fixed_char().length());
222+
case proto::Type::kVarchar:
223+
return VarCharType::get(context, type.varchar().length());
222224
case proto::Type::kDecimal: {
223225
const proto::Type::Decimal &decimalType = type.decimal();
224226
return mlir::substrait::DecimalType::get(context, decimalType.precision(),
@@ -702,6 +704,14 @@ importLiteral(ImplicitLocOpBuilder builder,
702704
auto attr = FixedCharAttr::get(context, stringAttr, fixedCharType);
703705
return builder.create<LiteralOp>(attr);
704706
}
707+
case Expression::Literal::LiteralTypeCase::kVarChar: {
708+
StringAttr stringAttr =
709+
StringAttr::get(context, message.var_char().value());
710+
VarCharType varCharType =
711+
VarCharType::get(context, message.var_char().value().size());
712+
auto attr = VarCharAttr::get(context, stringAttr, varCharType);
713+
return builder.create<LiteralOp>(attr);
714+
}
705715
case Expression::Literal::LiteralTypeCase::kDecimal: {
706716
APInt var(128, 0);
707717
llvm::LoadIntFromMemory(

test/Dialect/Substrait/literal.mlir

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,30 @@
3131

3232
// -----
3333

34+
// CHECK: substrait.plan version 0 : 42 : 1 {
35+
// CHECK-NEXT: relation
36+
// CHECK: %[[V0:.*]] = named_table
37+
// CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : tuple<si1> -> tuple<si1, !substrait.var_char<6>> {
38+
// CHECK-NEXT: ^[[BB0:.*]](%[[ARG0:.*]]: tuple<si1>):
39+
// CHECK-NEXT: %[[V2:.*]] = literal #substrait.var_char<"hello", 6>
40+
// CHECK-NEXT: yield %[[V2]] : !substrait.var_char<6>
41+
// CHECK-NEXT: }
42+
// CHECK-NEXT: yield %[[V1]] : tuple<si1, !substrait.var_char<6>
43+
44+
substrait.plan version 0 : 42 : 1 {
45+
relation {
46+
%0 = named_table @t1 as ["a"] : tuple<si1>
47+
%1 = project %0 : tuple<si1> -> tuple<si1, !substrait.var_char<6>> {
48+
^bb0(%arg : tuple<si1>):
49+
%var_char = literal #substrait.var_char<"hello", 6>
50+
yield %var_char : !substrait.var_char<6>
51+
}
52+
yield %1 : tuple<si1, !substrait.var_char<6>>
53+
}
54+
}
55+
56+
// -----
57+
3458
// CHECK: substrait.plan version 0 : 42 : 1 {
3559
// CHECK-NEXT: relation
3660
// CHECK: %[[V0:.*]] = named_table

test/Dialect/Substrait/types.mlir

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
// RUN: substrait-opt -split-input-file %s \
22
// RUN: | FileCheck %s
33

4+
// CHECK-LABEL: substrait.plan
5+
// CHECK: relation
6+
// CHECK: %[[V0:.*]] = named_table @t1 as ["a"] : tuple<!substrait.var_char<6>>
7+
// CHECK-NEXT: yield %0 : tuple<!substrait.var_char<6>>
8+
9+
substrait.plan version 0 : 42 : 1 {
10+
relation {
11+
%0 = named_table @t1 as ["a"] : tuple<!substrait.var_char<6>>
12+
yield %0 : tuple<!substrait.var_char<6>>
13+
}
14+
}
15+
16+
// -----
17+
418
// CHECK-LABEL: substrait.plan
519
// CHECK: relation
620
// CHECK: %[[V0:.*]] = named_table @t1 as ["a"] : tuple<!substrait.decimal<12, 2>>

test/Target/SubstraitPB/Export/literal.mlir

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,37 @@ substrait.plan version 0 : 42 : 1 {
3939

4040
// -----
4141

42+
// CHECK-LABEL: relations {
43+
// CHECK-NEXT: rel {
44+
// CHECK-NEXT: project {
45+
// CHECK-NEXT: common {
46+
// CHECK-NEXT: direct {
47+
// CHECK-NEXT: }
48+
// CHECK-NEXT: }
49+
// CHECK-NEXT: input {
50+
// CHECK-NEXT: read {
51+
// CHECK: expressions {
52+
// CHECK-NEXT: literal {
53+
// CHECK-NEXT: var_char {
54+
// CHECK-NEXT: value: "hello"
55+
// CHECK-NEXT: }
56+
// CHECK-NEXT: }
57+
// CHECK-NEXT: }
58+
59+
substrait.plan version 0 : 42 : 1 {
60+
relation {
61+
%0 = named_table @t1 as ["a"] : tuple<si1>
62+
%1 = project %0 : tuple<si1> -> tuple<si1, !substrait.var_char<6>> {
63+
^bb0(%arg0: tuple<si1>):
64+
%2 = literal #substrait.var_char<"hello", 6>
65+
yield %2 : !substrait.var_char<6>
66+
}
67+
yield %1 : tuple<si1, !substrait.var_char<6>>
68+
}
69+
}
70+
71+
// -----
72+
4273
// CHECK-LABEL: relations {
4374
// CHECK-NEXT: rel {
4475
// CHECK-NEXT: project {

test/Target/SubstraitPB/Export/types.mlir

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,32 @@ substrait.plan version 0 : 42 : 1 {
3636

3737
// -----
3838

39+
// CHECK-LABEL: relations {
40+
// CHECK-NEXT: rel {
41+
// CHECK-NEXT: read {
42+
// CHECK: base_schema {
43+
// CHECK-NEXT: names: "a"
44+
// CHECK-NEXT: struct {
45+
// CHECK-NEXT: types {
46+
// CHECK-NEXT: varchar {
47+
// CHECK-NEXT: length: 6
48+
// CHECK-NEXT: nullability: NULLABILITY_REQUIRED
49+
// CHECK-NEXT: }
50+
// CHECK-NEXT: }
51+
// CHECK-NEXT: nullability: NULLABILITY_REQUIRED
52+
// CHECK-NEXT: }
53+
// CHECK-NEXT: }
54+
// CHECK-NEXT: named_table {
55+
56+
substrait.plan version 0 : 42 : 1 {
57+
relation {
58+
%0 = named_table @t1 as ["a"] : tuple<!substrait.var_char<6>>
59+
yield %0 : tuple<!substrait.var_char<6>>
60+
}
61+
}
62+
63+
// -----
64+
3965
// CHECK-LABEL: relations {
4066
// CHECK-NEXT: rel {
4167
// CHECK-NEXT: read {

test/Target/SubstraitPB/Import/literal.textpb

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,62 @@ version {
6868

6969
# -----
7070

71+
# CHECK: substrait.plan version 0 : 42 : 1 {
72+
# CHECK-NEXT: relation
73+
# CHECK: %[[V0:.*]] = named_table
74+
# CHECK-NEXT: %[[V1:.*]] = project %[[V0]] : tuple<si1> -> tuple<si1, !substrait.var_char<5>> {
75+
# CHECK-NEXT: ^[[BB0:.*]](%[[ARG0:.*]]: tuple<si1>):
76+
# CHECK-NEXT: %[[V2:.*]] = literal #substrait.var_char<"hello", 5>
77+
# CHECK-NEXT: yield %[[V2]] : !substrait.var_char<5>
78+
# CHECK-NEXT: }
79+
# CHECK-NEXT: yield %[[V1]] : tuple<si1, !substrait.var_char<5>
80+
81+
relations {
82+
rel {
83+
project {
84+
common {
85+
direct {
86+
}
87+
}
88+
input {
89+
read {
90+
common {
91+
direct {
92+
}
93+
}
94+
base_schema {
95+
names: "a"
96+
struct {
97+
types {
98+
bool {
99+
nullability: NULLABILITY_REQUIRED
100+
}
101+
}
102+
nullability: NULLABILITY_REQUIRED
103+
}
104+
}
105+
named_table {
106+
names: "t1"
107+
}
108+
}
109+
}
110+
expressions {
111+
literal {
112+
var_char {
113+
value: "hello"
114+
}
115+
}
116+
}
117+
}
118+
}
119+
}
120+
version {
121+
minor_number: 42
122+
patch_number: 1
123+
}
124+
125+
# -----
126+
71127
# CHECK: substrait.plan version 0 : 42 : 1 {
72128
# CHECK-NEXT: relation
73129
# CHECK: %[[V0:.*]] = named_table

0 commit comments

Comments
 (0)