Skip to content

Commit 8eaf5de

Browse files
authored
feat: add support for reading lists (#292)
1 parent d1ea4cb commit 8eaf5de

File tree

5 files changed

+208
-10
lines changed

5 files changed

+208
-10
lines changed

src/row.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use super::{Error, Result, Statement};
44
use crate::types::{self, FromSql, FromSqlError, ValueRef};
55

66
use arrow::{
7-
array::{self, Array, StructArray},
7+
array::{self, Array, ArrayRef, ListArray, StructArray},
88
datatypes::*,
99
};
1010
use fallible_iterator::FallibleIterator;
@@ -339,6 +339,10 @@ impl<'stmt> Row<'stmt> {
339339

340340
fn value_ref(&self, row: usize, col: usize) -> ValueRef<'_> {
341341
let column = self.arr.as_ref().as_ref().unwrap().column(col);
342+
Self::value_ref_internal(row, col, column)
343+
}
344+
345+
pub(crate) fn value_ref_internal(row: usize, col: usize, column: &ArrayRef) -> ValueRef {
342346
if column.is_null(row) {
343347
return ValueRef::Null;
344348
}
@@ -592,7 +596,12 @@ impl<'stmt> Row<'stmt> {
592596
// DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
593597
// make_string_time!(array::Time64NanosecondArray, column, row)
594598
// }
595-
_ => unreachable!("invalid value: {}, {}", col, self.stmt.column_type(col)),
599+
DataType::List(_data) => {
600+
let arr = column.as_any().downcast_ref::<ListArray>().unwrap();
601+
602+
ValueRef::List(arr, row)
603+
}
604+
_ => unreachable!("invalid value: {} {}", col, column.data_type()),
596605
}
597606
}
598607

src/test_all_types.rs

Lines changed: 128 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use pretty_assertions::assert_eq;
22
use rust_decimal::Decimal;
33

44
use crate::{
5-
types::{TimeUnit, ValueRef},
5+
types::{TimeUnit, Type, Value, ValueRef},
66
Connection,
77
};
88

@@ -21,13 +21,6 @@ fn test_all_types() -> crate::Result<()> {
2121
"small_enum",
2222
"medium_enum",
2323
"large_enum",
24-
"int_array",
25-
"double_array",
26-
"date_array",
27-
"timestamp_array",
28-
"timestamptz_array",
29-
"varchar_array",
30-
"nested_int_array",
3124
"struct",
3225
"struct_of_arrays",
3326
"array_of_structs",
@@ -57,6 +50,9 @@ fn test_all_types() -> crate::Result<()> {
5750
idx += 1;
5851
for column in row.stmt.column_names() {
5952
let value = row.get_ref_unwrap(row.stmt.column_index(&column)?);
53+
if idx != 2 {
54+
assert_ne!(value.data_type(), Type::Null);
55+
}
6056
test_single(&mut idx, column, value);
6157
}
6258
}
@@ -213,6 +209,122 @@ fn test_single(idx: &mut i32, column: String, value: ValueRef) {
213209
1 => assert_eq!(value, ValueRef::Blob(&[0, 0, 0, 97])),
214210
_ => assert_eq!(value, ValueRef::Null),
215211
},
212+
"int_array" => match idx {
213+
0 => assert_eq!(value.to_owned(), Value::List(vec![])),
214+
1 => assert_eq!(
215+
value.to_owned(),
216+
Value::List(vec![
217+
Value::Int(42),
218+
Value::Int(999),
219+
Value::Null,
220+
Value::Null,
221+
Value::Int(-42),
222+
])
223+
),
224+
_ => assert_eq!(value, ValueRef::Null),
225+
},
226+
"double_array" => match idx {
227+
0 => assert_eq!(value.to_owned(), Value::List(vec![])),
228+
1 => {
229+
let value = value.to_owned();
230+
231+
if let Value::List(values) = value {
232+
assert_eq!(values.len(), 6);
233+
assert_eq!(values[0], Value::Double(42.0));
234+
assert!(unwrap(&values[1]).is_nan());
235+
let val = unwrap(&values[2]);
236+
assert!(val.is_infinite() && val.is_sign_positive());
237+
let val = unwrap(&values[3]);
238+
assert!(val.is_infinite() && val.is_sign_negative());
239+
assert_eq!(values[4], Value::Null);
240+
assert_eq!(values[5], Value::Double(-42.0));
241+
}
242+
}
243+
_ => assert_eq!(value, ValueRef::Null),
244+
},
245+
"date_array" => match idx {
246+
0 => assert_eq!(value.to_owned(), Value::List(vec![])),
247+
1 => assert_eq!(
248+
value.to_owned(),
249+
Value::List(vec![
250+
Value::Date32(0),
251+
Value::Date32(2147483647),
252+
Value::Date32(-2147483647),
253+
Value::Null,
254+
Value::Date32(19124),
255+
])
256+
),
257+
_ => assert_eq!(value, ValueRef::Null),
258+
},
259+
"timestamp_array" => match idx {
260+
0 => assert_eq!(value.to_owned(), Value::List(vec![])),
261+
1 => assert_eq!(
262+
value.to_owned(),
263+
Value::List(vec![
264+
Value::Timestamp(TimeUnit::Microsecond, 0,),
265+
Value::Timestamp(TimeUnit::Microsecond, 9223372036854775807,),
266+
Value::Timestamp(TimeUnit::Microsecond, -9223372036854775807,),
267+
Value::Null,
268+
Value::Timestamp(TimeUnit::Microsecond, 1652372625000000,),
269+
],)
270+
),
271+
_ => assert_eq!(value, ValueRef::Null),
272+
},
273+
"timestamptz_array" => match idx {
274+
0 => assert_eq!(value.to_owned(), Value::List(vec![])),
275+
1 => assert_eq!(
276+
value.to_owned(),
277+
Value::List(vec![
278+
Value::Timestamp(TimeUnit::Microsecond, 0,),
279+
Value::Timestamp(TimeUnit::Microsecond, 9223372036854775807,),
280+
Value::Timestamp(TimeUnit::Microsecond, -9223372036854775807,),
281+
Value::Null,
282+
Value::Timestamp(TimeUnit::Microsecond, 1652397825000000,),
283+
])
284+
),
285+
_ => assert_eq!(value, ValueRef::Null),
286+
},
287+
"varchar_array" => match idx {
288+
0 => assert_eq!(value.to_owned(), Value::List(vec![])),
289+
1 => assert_eq!(
290+
value.to_owned(),
291+
Value::List(vec![
292+
Value::Text("🦆🦆🦆🦆🦆🦆".to_string()),
293+
Value::Text("goose".to_string()),
294+
Value::Null,
295+
Value::Text("".to_string()),
296+
])
297+
),
298+
_ => assert_eq!(value, ValueRef::Null),
299+
},
300+
"nested_int_array" => match idx {
301+
0 => assert_eq!(value.to_owned(), Value::List(vec![])),
302+
1 => {
303+
assert_eq!(
304+
value.to_owned(),
305+
Value::List(vec![
306+
Value::List(vec![],),
307+
Value::List(vec![
308+
Value::Int(42,),
309+
Value::Int(999,),
310+
Value::Null,
311+
Value::Null,
312+
Value::Int(-42,),
313+
],),
314+
Value::Null,
315+
Value::List(vec![],),
316+
Value::List(vec![
317+
Value::Int(42,),
318+
Value::Int(999,),
319+
Value::Null,
320+
Value::Null,
321+
Value::Int(-42,),
322+
],),
323+
],)
324+
)
325+
}
326+
_ => assert_eq!(value, ValueRef::Null),
327+
},
216328
"bit" => match idx {
217329
0 => assert_eq!(value, ValueRef::Blob(&[1, 145, 46, 42, 215]),),
218330
1 => assert_eq!(value, ValueRef::Blob(&[3, 245])),
@@ -240,3 +352,11 @@ fn test_single(idx: &mut i32, column: String, value: ValueRef) {
240352
_ => todo!("{column:?}"),
241353
}
242354
}
355+
356+
fn unwrap(value: &Value) -> f64 {
357+
if let Value::Double(val) = value {
358+
*val
359+
} else {
360+
panic!();
361+
}
362+
}

src/types/mod.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ pub use self::{
7474
value_ref::{TimeUnit, ValueRef},
7575
};
7676

77+
use arrow::datatypes::DataType;
7778
use std::fmt;
7879

7980
#[cfg(feature = "chrono")]
@@ -146,10 +147,54 @@ pub enum Type {
146147
Time64,
147148
/// INTERVAL
148149
Interval,
150+
/// LIST
151+
List(Box<Type>),
149152
/// Any
150153
Any,
151154
}
152155

156+
impl From<&DataType> for Type {
157+
fn from(value: &DataType) -> Self {
158+
match value {
159+
DataType::Null => Self::Null,
160+
DataType::Boolean => Self::Boolean,
161+
DataType::Int8 => Self::TinyInt,
162+
DataType::Int16 => Self::SmallInt,
163+
DataType::Int32 => Self::Int,
164+
DataType::Int64 => Self::BigInt,
165+
DataType::UInt8 => Self::UTinyInt,
166+
DataType::UInt16 => Self::USmallInt,
167+
DataType::UInt32 => Self::UInt,
168+
DataType::UInt64 => Self::UBigInt,
169+
// DataType::Float16 => Self::Float16,
170+
// DataType::Float32 => Self::Float32,
171+
DataType::Float64 => Self::Float,
172+
DataType::Timestamp(_, _) => Self::Timestamp,
173+
DataType::Date32 => Self::Date32,
174+
// DataType::Date64 => Self::Date64,
175+
// DataType::Time32(_) => Self::Time32,
176+
DataType::Time64(_) => Self::Time64,
177+
// DataType::Duration(_) => Self::Duration,
178+
// DataType::Interval(_) => Self::Interval,
179+
DataType::Binary => Self::Blob,
180+
// DataType::FixedSizeBinary(_) => Self::FixedSizeBinary,
181+
// DataType::LargeBinary => Self::LargeBinary,
182+
DataType::Utf8 => Self::Text,
183+
// DataType::LargeUtf8 => Self::LargeUtf8,
184+
DataType::List(inner) => Self::List(Box::new(Type::from(inner.data_type()))),
185+
// DataType::FixedSizeList(field, size) => Self::Array,
186+
// DataType::LargeList(_) => Self::LargeList,
187+
// DataType::Struct(inner) => Self::Struct,
188+
// DataType::Union(_, _) => Self::Union,
189+
// DataType::Dictionary(_, _) => Self::Enum,
190+
DataType::Decimal128(..) => Self::Decimal,
191+
DataType::Decimal256(..) => Self::Decimal,
192+
// DataType::Map(field, ..) => Self::Map,
193+
res => unimplemented!("{}", res),
194+
}
195+
}
196+
}
197+
153198
impl fmt::Display for Type {
154199
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155200
match *self {
@@ -173,6 +218,7 @@ impl fmt::Display for Type {
173218
Type::Date32 => f.pad("Date32"),
174219
Type::Time64 => f.pad("Time64"),
175220
Type::Interval => f.pad("Interval"),
221+
Type::List(..) => f.pad("List"),
176222
Type::Any => f.pad("Any"),
177223
}
178224
}

src/types/value.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ pub enum Value {
5555
/// nanos
5656
nanos: i64,
5757
},
58+
/// The value is a list
59+
List(Vec<Value>),
5860
}
5961

6062
impl From<Null> for Value {
@@ -222,6 +224,7 @@ impl Value {
222224
Value::Date32(_) => Type::Date32,
223225
Value::Time64(..) => Type::Time64,
224226
Value::Interval { .. } => Type::Interval,
227+
Value::List(_) => todo!(),
225228
}
226229
}
227230
}

src/types/value_ref.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
use super::{Type, Value};
22
use crate::types::{FromSqlError, FromSqlResult};
33

4+
use crate::Row;
45
use rust_decimal::prelude::*;
56

7+
use arrow::array::{Array, ListArray};
8+
69
/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds.
710
/// Copy from arrow::datatypes::TimeUnit
811
#[derive(Copy, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
@@ -70,6 +73,8 @@ pub enum ValueRef<'a> {
7073
/// nanos
7174
nanos: i64,
7275
},
76+
/// The value is a list
77+
List(&'a ListArray, usize),
7378
}
7479

7580
impl ValueRef<'_> {
@@ -97,8 +102,14 @@ impl ValueRef<'_> {
97102
ValueRef::Date32(_) => Type::Date32,
98103
ValueRef::Time64(..) => Type::Time64,
99104
ValueRef::Interval { .. } => Type::Interval,
105+
ValueRef::List(arr, _) => arr.data_type().into(),
100106
}
101107
}
108+
109+
/// Returns an owned version of this ValueRef
110+
pub fn to_owned(&self) -> Value {
111+
(*self).into()
112+
}
102113
}
103114

104115
impl<'a> ValueRef<'a> {
@@ -151,6 +162,14 @@ impl From<ValueRef<'_>> for Value {
151162
ValueRef::Date32(d) => Value::Date32(d),
152163
ValueRef::Time64(t, d) => Value::Time64(t, d),
153164
ValueRef::Interval { months, days, nanos } => Value::Interval { months, days, nanos },
165+
ValueRef::List(items, idx) => {
166+
let offsets = items.offsets();
167+
let range = offsets[idx]..offsets[idx + 1];
168+
let map: Vec<Value> = range
169+
.map(|row| Row::value_ref_internal(row.try_into().unwrap(), idx, items.values()).to_owned())
170+
.collect();
171+
Value::List(map)
172+
}
154173
}
155174
}
156175
}
@@ -193,6 +212,7 @@ impl<'a> From<&'a Value> for ValueRef<'a> {
193212
Value::Date32(d) => ValueRef::Date32(d),
194213
Value::Time64(t, d) => ValueRef::Time64(t, d),
195214
Value::Interval { months, days, nanos } => ValueRef::Interval { months, days, nanos },
215+
Value::List(..) => unimplemented!(),
196216
}
197217
}
198218
}

0 commit comments

Comments
 (0)