Skip to content

Commit 613af3f

Browse files
authored
chore: duckdb e2e roundtrip tests (#5342)
Signed-off-by: Alexander Droste <[email protected]>
1 parent bd6e37a commit 613af3f

File tree

3 files changed

+200
-3
lines changed

3 files changed

+200
-3
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-duckdb/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ vortex-vector = { workspace = true }
4141
[dev-dependencies]
4242
jiff = { workspace = true }
4343
rstest = { workspace = true }
44+
vortex-runend = { workspace = true }
45+
vortex-sequence = { workspace = true }
4446

4547
[lints]
4648
workspace = true

vortex-duckdb/src/e2e_test/vortex_scan_test.rs

Lines changed: 196 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,17 @@ use num_traits::AsPrimitive;
1515
use tempfile::NamedTempFile;
1616
use vortex::IntoArray;
1717
use vortex::arrays::{
18-
BoolArray, ConstantArray, FixedSizeListArray, ListArray, PrimitiveArray, StructArray,
19-
VarBinArray, VarBinViewArray,
18+
BoolArray, ConstantArray, DictArray, FixedSizeListArray, ListArray, PrimitiveArray,
19+
StructArray, VarBinArray, VarBinViewArray,
2020
};
2121
use vortex::buffer::buffer;
22+
use vortex::dtype::{Nullability, PType};
2223
use vortex::file::WriteOptionsSessionExt;
2324
use vortex::io::runtime::BlockingRuntime;
24-
use vortex::scalar::Scalar;
25+
use vortex::scalar::{PValue, Scalar};
2526
use vortex::validity::Validity;
27+
use vortex_runend::RunEndArray;
28+
use vortex_sequence::SequenceArray;
2629

2730
use crate::cpp::{duckdb_string_t, duckdb_timestamp};
2831
use crate::duckdb::{Connection, Database};
@@ -703,3 +706,193 @@ fn test_vortex_scan_ultra_deep_nesting() {
703706
}
704707
assert_eq!(row_count, 1, "Should have retrieved 1 row");
705708
}
709+
710+
async fn write_vortex_file_with_encodings() -> NamedTempFile {
711+
let temp_file_path = create_temp_file();
712+
713+
// 0. Primitive
714+
let primitive_i32 = buffer![1i32, 2, 3, 4, 5];
715+
let primitive_f64 = buffer![1.1f64, 2.2, 3.3, 4.4, 5.5];
716+
717+
// 1. Constant
718+
let constant_str = ConstantArray::new(Scalar::from("constant_value"), 5);
719+
720+
// 2. Boolean
721+
let bool_array = BoolArray::from_bit_buffer(
722+
vec![true, false, true, false, true].into(),
723+
Validity::NonNullable,
724+
);
725+
726+
// 3. Dictionary
727+
let keys = buffer![0u32, 1, 0, 2, 1];
728+
let values = VarBinArray::from(vec!["apple", "banana", "cherry"]);
729+
let dict_array = DictArray::try_new(keys.into_array(), values.into_array()).unwrap();
730+
731+
// 4. Run-End
732+
let run_ends = buffer![3u32, 5];
733+
let run_values = buffer![100i32, 200];
734+
let rle_array = RunEndArray::try_new(run_ends.into_array(), run_values.into_array()).unwrap();
735+
736+
// 5. Sequence array
737+
let sequence_array = SequenceArray::new(
738+
PValue::I64(0),
739+
PValue::I64(10),
740+
PType::I64,
741+
Nullability::NonNullable,
742+
5,
743+
)
744+
.unwrap()
745+
.into_array();
746+
747+
// 6. VarBin
748+
let varbin_array = VarBinArray::from(vec!["hello", "world", "vortex", "test", "data"]);
749+
750+
// 7. List
751+
let list_values = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
752+
let list_offsets = buffer![0u32, 2, 5, 6, 10, 10]; // [1,2], [3,4,5], [6], [7,8,9,10], []
753+
let list_array = ListArray::try_new(
754+
list_values.into_array(),
755+
list_offsets.into_array(),
756+
Validity::NonNullable,
757+
)
758+
.unwrap();
759+
760+
// 8. Fixed-size list
761+
let fixed_list_values = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
762+
let fixed_list_array = FixedSizeListArray::try_new(
763+
fixed_list_values.into_array(),
764+
2, // 2 elements per list
765+
Validity::NonNullable,
766+
5, // 5 lists
767+
)
768+
.unwrap();
769+
770+
// Struct array containing the different encodings.
771+
let struct_array = StructArray::try_from_iter([
772+
("primitive_i32", primitive_i32.into_array()),
773+
("primitive_f64", primitive_f64.into_array()),
774+
("constant_str", constant_str.into_array()),
775+
("bool_col", bool_array.into_array()),
776+
("dict_col", dict_array.into_array()),
777+
("rle_col", rle_array.into_array()),
778+
("sequence_col", sequence_array),
779+
("varbin_col", varbin_array.into_array()),
780+
("list_col", list_array.into_array()),
781+
("fixed_list_col", fixed_list_array.into_array()),
782+
])
783+
.unwrap();
784+
785+
// Write to file
786+
let mut file = async_fs::File::create(&temp_file_path).await.unwrap();
787+
SESSION
788+
.write_options()
789+
.write(&mut file, struct_array.to_array_stream())
790+
.await
791+
.unwrap();
792+
793+
temp_file_path
794+
}
795+
796+
#[allow(clippy::cognitive_complexity)]
797+
#[test]
798+
fn test_vortex_encodings_roundtrip() {
799+
let file = RUNTIME.block_on(write_vortex_file_with_encodings());
800+
let conn = database_connection();
801+
802+
// Test reading back each column type
803+
let result = conn
804+
.query(&format!(
805+
"SELECT * FROM vortex_scan('{}')",
806+
file.path().to_string_lossy()
807+
))
808+
.unwrap();
809+
810+
let chunk = result.into_iter().next().unwrap();
811+
assert_eq!(chunk.len(), 5); // 5 rows
812+
assert_eq!(chunk.column_count(), 10); // 10 columns
813+
814+
// Verify primitive i32 (column 0)
815+
let primitive_i32_vec = chunk.get_vector(0);
816+
let primitive_i32_slice = primitive_i32_vec.as_slice_with_len::<i32>(chunk.len().as_());
817+
assert_eq!(primitive_i32_slice, [1, 2, 3, 4, 5]);
818+
819+
// Verify primitive f64 (column 1)
820+
let primitive_f64_vec = chunk.get_vector(1);
821+
let primitive_f64_slice = primitive_f64_vec.as_slice_with_len::<f64>(chunk.len().as_());
822+
assert!((primitive_f64_slice[0] - 1.1).abs() < f64::EPSILON);
823+
assert!((primitive_f64_slice[1] - 2.2).abs() < f64::EPSILON);
824+
assert!((primitive_f64_slice[2] - 3.3).abs() < f64::EPSILON);
825+
826+
// Verify constant string (column 2)
827+
let mut constant_vec = chunk.get_vector(2);
828+
let constant_slice = unsafe { constant_vec.as_slice_mut::<duckdb_string_t>(chunk.len().as_()) };
829+
for idx in 0..5 {
830+
let string_val = String::from_duckdb_value(&mut constant_slice[idx]);
831+
assert_eq!(string_val, "constant_value");
832+
}
833+
834+
// Verify boolean (column 3)
835+
let bool_vec = chunk.get_vector(3);
836+
let bool_slice = bool_vec.as_slice_with_len::<bool>(chunk.len().as_());
837+
assert_eq!(bool_slice, [true, false, true, false, true]);
838+
839+
// Verify dictionary (column 4)
840+
let mut dict_vec = chunk.get_vector(4);
841+
let dict_slice = unsafe { dict_vec.as_slice_mut::<duckdb_string_t>(chunk.len().as_()) };
842+
// Keys were [0, 1, 0, 2, 1] and values were ["apple", "banana", "cherry"]
843+
let expected_dict_values = ["apple", "banana", "apple", "cherry", "banana"];
844+
for idx in 0..5 {
845+
let string_val = String::from_duckdb_value(&mut dict_slice[idx]);
846+
assert_eq!(string_val, expected_dict_values[idx]);
847+
}
848+
849+
// Verify RLE (column 5)
850+
let rle_vec = chunk.get_vector(5);
851+
let rle_slice = rle_vec.as_slice_with_len::<i32>(chunk.len().as_());
852+
assert_eq!(rle_slice, [100, 100, 100, 200, 200]);
853+
854+
// Verify sequence (column 6)
855+
let seq_vec = chunk.get_vector(6);
856+
let seq_slice = seq_vec.as_slice_with_len::<i64>(chunk.len().as_());
857+
assert_eq!(seq_slice, [0, 10, 20, 30, 40]);
858+
859+
// Verify varbin (column 7)
860+
let mut varbin_vec = chunk.get_vector(7);
861+
let varbin_slice = unsafe { varbin_vec.as_slice_mut::<duckdb_string_t>(chunk.len().as_()) };
862+
let expected_strings = ["hello", "world", "vortex", "test", "data"];
863+
for i in 0..5 {
864+
let string_val = String::from_duckdb_value(&mut varbin_slice[i]);
865+
assert_eq!(string_val, expected_strings[i]);
866+
}
867+
868+
// Verify list (column 8)
869+
// Expected lists: [1,2], [3,4,5], [6], [7,8,9,10], []
870+
let list_vec = chunk.get_vector(8);
871+
let list_entries = list_vec.as_slice_with_len::<cpp::duckdb_list_entry>(chunk.len().as_());
872+
873+
// Verify list lengths
874+
assert_eq!(list_entries[0].length, 2); // [1,2]
875+
assert_eq!(list_entries[1].length, 3); // [3,4,5]
876+
assert_eq!(list_entries[2].length, 1); // [6]
877+
assert_eq!(list_entries[3].length, 4); // [7,8,9,10]
878+
assert_eq!(list_entries[4].length, 0); // []
879+
880+
// Verify list offsets are sequential
881+
assert_eq!(list_entries[0].offset, 0);
882+
assert_eq!(list_entries[1].offset, 2);
883+
assert_eq!(list_entries[2].offset, 5);
884+
assert_eq!(list_entries[3].offset, 6);
885+
assert_eq!(list_entries[4].offset, 10);
886+
887+
// Get child vector and verify actual values
888+
let list_child = list_vec.list_vector_get_child();
889+
let child_values = list_child.as_slice_with_len::<i32>(10); // 10 total child elements
890+
assert_eq!(child_values, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
891+
892+
// Verify fixed-size list column (column 9)
893+
// Expected fixed-size lists: [1,2], [3,4], [5,6], [7,8], [9,10]
894+
let fixed_list_vec = chunk.get_vector(9);
895+
let fixed_child = fixed_list_vec.array_vector_get_child();
896+
let fixed_child_values = fixed_child.as_slice_with_len::<i32>(10); // 10 total child elements
897+
assert_eq!(fixed_child_values, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
898+
}

0 commit comments

Comments
 (0)