Skip to content

Commit c1e897a

Browse files
authored
Feature: FixedSizeList infrastructure (#4385)
Adds infrastructure to support `FixedSizeList` including: - Arbitrary implementations - Flatbuffers - Protobuf - Some tests (commit granularity is nice if you want to look at that) --------- Signed-off-by: Connor Tsui <[email protected]>
1 parent 4527649 commit c1e897a

File tree

14 files changed

+305
-36
lines changed

14 files changed

+305
-36
lines changed

vortex-array/src/arrays/arbitrary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ fn random_array(u: &mut Unstructured, dtype: &DType, len: Option<usize>) -> Resu
125125
}
126126
DType::List(ldt, n) => random_list(u, ldt, *n, chunk_len),
127127
DType::FixedSizeList(..) => {
128-
unimplemented!("TODO(connor)[FixedSizeList]")
128+
unimplemented!("TODO(connor)[FixedSizeList]: Create canonical fixed-size list")
129129
}
130130
DType::Extension(..) => {
131131
todo!("Extension arrays are not implemented")

vortex-array/src/builders/list.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ use crate::compute::{add_scalar, cast, sub_scalar};
1717
use crate::{Array, ArrayRef, IntoArray, ToCanonical};
1818

1919
pub struct ListBuilder<O: NativePType> {
20+
/// The values of the list.
2021
value_builder: Box<dyn ArrayBuilder>,
22+
/// Represents the offsets into the values array.
2123
index_builder: PrimitiveBuilder<O>,
2224
nulls: LazyNullBufferBuilder,
2325
nullability: Nullability,

vortex-datafusion/src/convert/scalars.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ impl TryToDataFusion<ScalarValue> for Scalar {
7575
),
7676
DType::Struct(..) => todo!("struct scalar conversion"),
7777
DType::List(..) => todo!("list scalar conversion"),
78-
DType::FixedSizeList(..) => unimplemented!("TODO(connor)[FixedSizeList]"),
78+
DType::FixedSizeList(..) => todo!("fixed-size list scalar conversion"),
7979
DType::Extension(ext) => {
8080
let storage_scalar = self.as_extension().storage();
8181

vortex-dtype/src/arbitrary.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,15 @@ fn random_dtype(u: &mut Unstructured<'_>, depth: u8) -> Result<DType> {
3535
// container types
3636
6 => DType::Struct(random_struct_dtype(u, depth - 1)?, u.arbitrary()?),
3737
7 => DType::List(Arc::new(random_dtype(u, depth - 1)?), u.arbitrary()?),
38-
// 8 => unimplemented!("TODO(connor)[FixedSizeList]"),
38+
8 => {
39+
unimplemented!("TODO(connor)[FixedSizeList]");
40+
// DType::FixedSizeList(
41+
// Arc::new(random_dtype(u, depth - 1)?),
42+
// // We limit the list size to 3 rather (following random struct fields).
43+
// u.choose_index(3)?.try_into().vortex_expect("impossible"),
44+
// u.arbitrary()?,
45+
// )
46+
}
3947
// Null,
4048
// Extension(ExtDType, Nullability),
4149
_ => unreachable!("Number out of range"),

vortex-dtype/src/serde/flatbuffers/mod.rs

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,24 @@ impl TryFrom<ViewedDType> for DType {
133133
fb_list.nullable().into(),
134134
))
135135
}
136+
fb::Type::FixedSizeList => {
137+
let fb_fixed_size_list = fb.type__as_fixed_size_list().ok_or_else(|| {
138+
vortex_err!("failed to parse fixed-size list from flatbuffer")
139+
})?;
140+
141+
let list_element = fb_fixed_size_list.element_type().ok_or_else(|| {
142+
vortex_err!("failed to parse list element type from flatbuffer")
143+
})?;
144+
let element_dtype = Self::try_from(ViewedDType::from_fb_loc(
145+
list_element._tab.loc(),
146+
vfdt.buffer().clone(),
147+
))?;
148+
Ok(Self::FixedSizeList(
149+
Arc::new(element_dtype),
150+
fb_fixed_size_list.size(),
151+
fb_fixed_size_list.nullable().into(),
152+
))
153+
}
136154
fb::Type::Struct_ => {
137155
let fb_struct = fb
138156
.type__as_struct_()
@@ -167,7 +185,7 @@ impl TryFrom<ViewedDType> for DType {
167185
}
168186
// This is here to fail to compile if another variant is included.
169187
#[allow(clippy::wildcard_in_or_patterns)]
170-
fb::Type(10) => Err(vortex_err!("Unknown DType variant")),
188+
fb::Type(11) => Err(vortex_err!("Unknown DType variant")),
171189
_ => Err(vortex_err!("Unknown DType variant")),
172190
}
173191
}
@@ -257,9 +275,17 @@ impl WriteFlatBuffer for DType {
257275
)
258276
.as_union_value()
259277
}
260-
Self::FixedSizeList(..) => {
261-
// TODO(connor)[FixedSizeList]: Add a `fb::FixedSizeList` type.
262-
unimplemented!("TODO(connor)[FixedSizeList]")
278+
Self::FixedSizeList(edt, size, n) => {
279+
let element_type = Some(edt.as_ref().write_flatbuffer(fbb));
280+
fb::FixedSizeList::create(
281+
fbb,
282+
&fb::FixedSizeListArgs {
283+
element_type,
284+
size: *size,
285+
nullable: (*n).into(),
286+
},
287+
)
288+
.as_union_value()
263289
}
264290
Self::Extension(ext) => {
265291
let id = Some(fbb.create_string(ext.id().as_ref()));
@@ -286,9 +312,7 @@ impl WriteFlatBuffer for DType {
286312
Self::Binary(_) => fb::Type::Binary,
287313
Self::Struct(..) => fb::Type::Struct_,
288314
Self::List(..) => fb::Type::List,
289-
Self::FixedSizeList(..) => {
290-
unimplemented!("TODO(connor)[FixedSizeList]")
291-
}
315+
Self::FixedSizeList(..) => fb::Type::FixedSizeList,
292316
Self::Extension { .. } => fb::Type::Extension,
293317
};
294318

@@ -382,7 +406,11 @@ mod test {
382406
Arc::new(DType::Primitive(PType::F32, Nullability::Nullable)),
383407
Nullability::NonNullable,
384408
));
385-
// TODO(connor)[FixedSizeList]
409+
roundtrip_dtype(DType::FixedSizeList(
410+
Arc::new(DType::Primitive(PType::F32, Nullability::Nullable)),
411+
2,
412+
Nullability::NonNullable,
413+
));
386414
roundtrip_dtype(DType::Struct(
387415
StructFields::new(
388416
["strings", "ints"].into(),

vortex-dtype/src/serde/proto.rs

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,19 @@ impl TryFrom<&pb::DType> for DType {
5252
nullable,
5353
))
5454
}
55+
DtypeType::FixedSizeList(fsl) => {
56+
let nullable = fsl.nullable.into();
57+
Ok(Self::FixedSizeList(
58+
fsl.element_type
59+
.as_ref()
60+
.ok_or_else(|| vortex_err!(InvalidSerde: "Invalid fixed-size list element type"))?
61+
.as_ref()
62+
.try_into()
63+
.map(Arc::new)?,
64+
fsl.size,
65+
nullable,
66+
))
67+
}
5568
DtypeType::Extension(e) => Ok(Self::Extension(
5669
Arc::new(ExtDType::new(
5770
ExtID::from(e.id.as_str()),
@@ -72,36 +85,39 @@ impl From<&DType> for pb::DType {
7285
Self {
7386
dtype_type: Some(match value {
7487
DType::Null => DtypeType::Null(pb::Null {}),
75-
DType::Bool(n) => DtypeType::Bool(pb::Bool {
76-
nullable: (*n).into(),
88+
DType::Bool(null) => DtypeType::Bool(pb::Bool {
89+
nullable: (*null).into(),
7790
}),
78-
DType::Primitive(ptype, n) => DtypeType::Primitive(pb::Primitive {
91+
DType::Primitive(ptype, null) => DtypeType::Primitive(pb::Primitive {
7992
r#type: pb::PType::from(*ptype).into(),
80-
nullable: (*n).into(),
93+
nullable: (*null).into(),
8194
}),
82-
DType::Decimal(decimal, n) => DtypeType::Decimal(pb::Decimal {
95+
DType::Decimal(decimal, null) => DtypeType::Decimal(pb::Decimal {
8396
precision: decimal.precision() as u32,
8497
scale: decimal.scale() as i32,
85-
nullable: (*n).into(),
98+
nullable: (*null).into(),
8699
}),
87-
DType::Utf8(n) => DtypeType::Utf8(pb::Utf8 {
88-
nullable: (*n).into(),
100+
DType::Utf8(null) => DtypeType::Utf8(pb::Utf8 {
101+
nullable: (*null).into(),
89102
}),
90-
DType::Binary(n) => DtypeType::Binary(pb::Binary {
91-
nullable: (*n).into(),
103+
DType::Binary(null) => DtypeType::Binary(pb::Binary {
104+
nullable: (*null).into(),
92105
}),
93-
DType::Struct(s, n) => DtypeType::Struct(pb::Struct {
106+
DType::Struct(s, null) => DtypeType::Struct(pb::Struct {
94107
names: s.names().iter().map(|s| s.as_ref().to_string()).collect(),
95108
dtypes: s.fields().map(|d| Self::from(&d)).collect(),
96-
nullable: (*n).into(),
109+
nullable: (*null).into(),
97110
}),
98-
DType::List(l, n) => DtypeType::List(Box::new(pb::List {
99-
element_type: Some(Box::new(l.as_ref().into())),
100-
nullable: (*n).into(),
111+
DType::List(edt, null) => DtypeType::List(Box::new(pb::List {
112+
element_type: Some(Box::new(edt.as_ref().into())),
113+
nullable: (*null).into(),
101114
})),
102-
DType::FixedSizeList(..) => {
103-
// TODO(connor)[FixedSizeList]
104-
unimplemented!("TODO(connor)[FixedSizeList]")
115+
DType::FixedSizeList(edt, size, null) => {
116+
DtypeType::FixedSizeList(Box::new(pb::FixedSizeList {
117+
element_type: Some(Box::new(edt.as_ref().into())),
118+
size: *size,
119+
nullable: (*null).into(),
120+
}))
105121
}
106122
DType::Extension(e) => DtypeType::Extension(Box::new(pb::Extension {
107123
id: e.id().as_ref().into(),
@@ -259,6 +275,7 @@ mod tests {
259275
#[test]
260276
fn test_list_round_trip() {
261277
let list_types = vec![
278+
// List types
262279
DType::List(
263280
Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)),
264281
Nullability::Nullable,
@@ -274,6 +291,26 @@ mod tests {
274291
)),
275292
Nullability::NonNullable,
276293
),
294+
// FixedSizeList types
295+
DType::FixedSizeList(
296+
Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)),
297+
3,
298+
Nullability::Nullable,
299+
),
300+
DType::FixedSizeList(
301+
Arc::new(DType::Utf8(Nullability::Nullable)),
302+
5,
303+
Nullability::NonNullable,
304+
),
305+
DType::FixedSizeList(
306+
Arc::new(DType::FixedSizeList(
307+
Arc::new(DType::Primitive(PType::F64, Nullability::NonNullable)),
308+
2,
309+
Nullability::Nullable,
310+
)),
311+
4,
312+
Nullability::NonNullable,
313+
),
277314
];
278315

279316
for dtype in list_types {

vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ table List {
5151
nullable: bool;
5252
}
5353

54+
table FixedSizeList {
55+
element_type: DType;
56+
size: uint32;
57+
nullable: bool;
58+
}
59+
5460
table Extension {
5561
id: string;
5662
storage_dtype: DType;
@@ -67,6 +73,7 @@ union Type {
6773
Struct_ = 7,
6874
List = 8,
6975
Extension = 9,
76+
FixedSizeList = 10, // This is after `Extension` for backwards compatibility.
7077
}
7178

7279
table DType {

0 commit comments

Comments
 (0)