Skip to content

Commit 69af8eb

Browse files
committed
chore: duckdb e2e roundtrip tests
Signed-off-by: Alexander Droste <[email protected]>
1 parent 5405737 commit 69af8eb

File tree

3 files changed

+201
-3
lines changed

3 files changed

+201
-3
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-duckdb/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ vortex = { workspace = true, features = ["files", "tokio", "object_store"] }
3838
vortex-utils = { workspace = true, features = ["dashmap"] }
3939
vortex-vector = { workspace = true }
4040

41+
# Encoding dependencies for tests
42+
vortex-runend = { path = "../encodings/runend" }
43+
vortex-sequence = { path = "../encodings/sequence" }
44+
4145
[dev-dependencies]
4246
jiff = { workspace = true }
4347
rstest = { workspace = true }

vortex-duckdb/src/e2e_test/vortex_scan_test.rs

Lines changed: 195 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,17 @@ use num_traits::AsPrimitive;
1515
use tempfile::NamedTempFile;
1616
use vortex::IntoArray;
1717
use vortex::arrays::{
18-
BoolArray, ConstantArray, FixedSizeListArray, ListArray, PrimitiveArray, StructArray,
19-
VarBinArray, VarBinViewArray,
18+
BoolArray, ConstantArray, DictArray, FixedSizeListArray, ListArray, PrimitiveArray,
19+
StructArray, VarBinArray, VarBinViewArray,
2020
};
2121
use vortex::buffer::buffer;
22+
use vortex::dtype::{Nullability, PType};
2223
use vortex::file::WriteOptionsSessionExt;
2324
use vortex::io::runtime::BlockingRuntime;
24-
use vortex::scalar::Scalar;
25+
use vortex::scalar::{PValue, Scalar};
2526
use vortex::validity::Validity;
27+
use vortex_runend::RunEndArray;
28+
use vortex_sequence::SequenceArray;
2629

2730
use crate::cpp::{duckdb_string_t, duckdb_timestamp};
2831
use crate::duckdb::{Connection, Database};
@@ -703,3 +706,192 @@ fn test_vortex_scan_ultra_deep_nesting() {
703706
}
704707
assert_eq!(row_count, 1, "Should have retrieved 1 row");
705708
}
709+
710+
async fn write_vortex_file_with_encodings() -> NamedTempFile {
711+
let temp_file_path = create_temp_file();
712+
713+
// 1. Primitive
714+
let primitive_i32 = buffer![1i32, 2, 3, 4, 5];
715+
let primitive_f64 = buffer![1.1f64, 2.2, 3.3, 4.4, 5.5];
716+
717+
// 2. Constant
718+
let constant_str = ConstantArray::new(Scalar::from("constant_value"), 5);
719+
720+
// 3. Boolean
721+
let bool_array = BoolArray::from_bit_buffer(
722+
vec![true, false, true, false, true].into(),
723+
Validity::NonNullable,
724+
);
725+
726+
// 4. Dictionary
727+
let keys = buffer![0u32, 1, 0, 2, 1];
728+
let values = VarBinArray::from(vec!["apple", "banana", "cherry"]);
729+
let dict_array = DictArray::try_new(keys.into_array(), values.into_array()).unwrap();
730+
731+
// 5. Run-End
732+
let run_ends = buffer![3u32, 5];
733+
let run_values = buffer![100i32, 200];
734+
let rle_array = RunEndArray::try_new(run_ends.into_array(), run_values.into_array()).unwrap();
735+
736+
// 6. Sequence array
737+
let sequence_array = SequenceArray::new(
738+
PValue::I64(0),
739+
PValue::I64(10),
740+
PType::I64,
741+
Nullability::NonNullable,
742+
5,
743+
)
744+
.unwrap()
745+
.into_array();
746+
747+
// 7. VarBin
748+
let varbin_array = VarBinArray::from(vec!["hello", "world", "vortex", "test", "data"]);
749+
750+
// 8. List
751+
let list_values = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
752+
let list_offsets = buffer![0u32, 2, 5, 6, 10, 10]; // [1,2], [3,4,5], [6], [7,8,9,10], []
753+
let list_array = ListArray::try_new(
754+
list_values.into_array(),
755+
list_offsets.into_array(),
756+
Validity::NonNullable,
757+
)
758+
.unwrap();
759+
760+
// 9. Fixed-size list
761+
let fixed_list_values = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
762+
let fixed_list_array = FixedSizeListArray::try_new(
763+
fixed_list_values.into_array(),
764+
2, // 2 elements per list
765+
Validity::NonNullable,
766+
5, // 5 lists
767+
)
768+
.unwrap();
769+
770+
// Struct array containing the different encodings.
771+
let struct_array = StructArray::try_from_iter([
772+
("primitive_i32", primitive_i32.into_array()),
773+
("primitive_f64", primitive_f64.into_array()),
774+
("constant_str", constant_str.into_array()),
775+
("bool_col", bool_array.into_array()),
776+
("dict_col", dict_array.into_array()),
777+
("rle_col", rle_array.into_array()),
778+
("sequence_col", sequence_array),
779+
("varbin_col", varbin_array.into_array()),
780+
("list_col", list_array.into_array()),
781+
("fixed_list_col", fixed_list_array.into_array()),
782+
])
783+
.unwrap();
784+
785+
// Write to file
786+
let mut file = async_fs::File::create(&temp_file_path).await.unwrap();
787+
SESSION
788+
.write_options()
789+
.write(&mut file, struct_array.to_array_stream())
790+
.await
791+
.unwrap();
792+
793+
temp_file_path
794+
}
795+
796+
#[test]
797+
fn test_vortex_encodings_roundtrip() {
798+
let file = RUNTIME.block_on(write_vortex_file_with_encodings());
799+
let conn = database_connection();
800+
801+
// Test reading back each column type
802+
let result = conn
803+
.query(&format!(
804+
"SELECT * FROM vortex_scan('{}')",
805+
file.path().to_string_lossy()
806+
))
807+
.unwrap();
808+
809+
let chunk = result.into_iter().next().unwrap();
810+
assert_eq!(chunk.len(), 5); // 5 rows
811+
assert_eq!(chunk.column_count(), 10); // 10 columns
812+
813+
// Verify primitive i32 (column 0)
814+
let primitive_i32_vec = chunk.get_vector(0);
815+
let primitive_i32_slice = primitive_i32_vec.as_slice_with_len::<i32>(chunk.len().as_());
816+
assert_eq!(primitive_i32_slice, [1, 2, 3, 4, 5]);
817+
818+
// Verify primitive f64 (column 1)
819+
let primitive_f64_vec = chunk.get_vector(1);
820+
let primitive_f64_slice = primitive_f64_vec.as_slice_with_len::<f64>(chunk.len().as_());
821+
assert!((primitive_f64_slice[0] - 1.1).abs() < f64::EPSILON);
822+
assert!((primitive_f64_slice[1] - 2.2).abs() < f64::EPSILON);
823+
assert!((primitive_f64_slice[2] - 3.3).abs() < f64::EPSILON);
824+
825+
// Verify constant string (column 2)
826+
let mut constant_vec = chunk.get_vector(2);
827+
let constant_slice = unsafe { constant_vec.as_slice_mut::<duckdb_string_t>(chunk.len().as_()) };
828+
for idx in 0..5 {
829+
let string_val = String::from_duckdb_value(&mut constant_slice[idx]);
830+
assert_eq!(string_val, "constant_value");
831+
}
832+
833+
// Verify boolean (column 3)
834+
let bool_vec = chunk.get_vector(3);
835+
let bool_slice = bool_vec.as_slice_with_len::<bool>(chunk.len().as_());
836+
assert_eq!(bool_slice, [true, false, true, false, true]);
837+
838+
// Verify dictionary (column 4)
839+
let mut dict_vec = chunk.get_vector(4);
840+
let dict_slice = unsafe { dict_vec.as_slice_mut::<duckdb_string_t>(chunk.len().as_()) };
841+
// Keys were [0, 1, 0, 2, 1] and values were ["apple", "banana", "cherry"]
842+
let expected_dict_values = ["apple", "banana", "apple", "cherry", "banana"];
843+
for idx in 0..5 {
844+
let string_val = String::from_duckdb_value(&mut dict_slice[idx]);
845+
assert_eq!(string_val, expected_dict_values[idx]);
846+
}
847+
848+
// Verify RLE (column 5)
849+
let rle_vec = chunk.get_vector(5);
850+
let rle_slice = rle_vec.as_slice_with_len::<i32>(chunk.len().as_());
851+
assert_eq!(rle_slice, [100, 100, 100, 200, 200]);
852+
853+
// Verify sequence (column 6)
854+
let seq_vec = chunk.get_vector(6);
855+
let seq_slice = seq_vec.as_slice_with_len::<i64>(chunk.len().as_());
856+
assert_eq!(seq_slice, [0, 10, 20, 30, 40]);
857+
858+
// Verify varbin (column 7)
859+
let mut varbin_vec = chunk.get_vector(7);
860+
let varbin_slice = unsafe { varbin_vec.as_slice_mut::<duckdb_string_t>(chunk.len().as_()) };
861+
let expected_strings = ["hello", "world", "vortex", "test", "data"];
862+
for i in 0..5 {
863+
let string_val = String::from_duckdb_value(&mut varbin_slice[i]);
864+
assert_eq!(string_val, expected_strings[i]);
865+
}
866+
867+
// Verify list (column 8)
868+
// Expected lists: [1,2], [3,4,5], [6], [7,8,9,10], []
869+
let list_vec = chunk.get_vector(8);
870+
let list_entries = list_vec.as_slice_with_len::<cpp::duckdb_list_entry>(chunk.len().as_());
871+
872+
// Verify list lengths
873+
assert_eq!(list_entries[0].length, 2); // [1,2]
874+
assert_eq!(list_entries[1].length, 3); // [3,4,5]
875+
assert_eq!(list_entries[2].length, 1); // [6]
876+
assert_eq!(list_entries[3].length, 4); // [7,8,9,10]
877+
assert_eq!(list_entries[4].length, 0); // []
878+
879+
// Verify list offsets are sequential
880+
assert_eq!(list_entries[0].offset, 0);
881+
assert_eq!(list_entries[1].offset, 2);
882+
assert_eq!(list_entries[2].offset, 5);
883+
assert_eq!(list_entries[3].offset, 6);
884+
assert_eq!(list_entries[4].offset, 10);
885+
886+
// Get child vector and verify actual values
887+
let list_child = list_vec.list_vector_get_child();
888+
let child_values = list_child.as_slice_with_len::<i32>(10); // 10 total child elements
889+
assert_eq!(child_values, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
890+
891+
// Verify fixed-size list column (column 9)
892+
// Expected fixed-size lists: [1,2], [3,4], [5,6], [7,8], [9,10]
893+
let fixed_list_vec = chunk.get_vector(9);
894+
let fixed_child = fixed_list_vec.array_vector_get_child();
895+
let fixed_child_values = fixed_child.as_slice_with_len::<i32>(10); // 10 total child elements
896+
assert_eq!(fixed_child_values, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
897+
}

0 commit comments

Comments
 (0)