Skip to content

Commit 90b5691

Browse files
committed
vortex-datafusion: Add test to reproduce schema field order mismatch
Signed-off-by: Frederic Branczyk <[email protected]>
1 parent fe4c81b commit 90b5691

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed

vortex-datafusion/src/persistent/opener.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,4 +870,81 @@ mod tests {
870870

871871
Ok(())
872872
}
873+
874+
#[tokio::test]
875+
// Minimal reproducing test for the schema projection bug.
876+
// Before the fix, this would fail with a cast error when the file schema
877+
// and table schema have different field orders and we project a subset of columns.
878+
async fn test_projection_bug_minimal_repro() -> anyhow::Result<()> {
879+
let object_store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
880+
let file_path = "/path/file.vortex";
881+
882+
// File has columns in order: a, b, c with simple types
883+
let batch = record_batch!(
884+
("a", Int32, vec![Some(1)]),
885+
("b", Utf8, vec![Some("test")]),
886+
("c", Int32, vec![Some(2)])
887+
)
888+
.unwrap();
889+
let data_size = write_arrow_to_vortex(object_store.clone(), file_path, batch).await?;
890+
891+
// Table schema has columns in DIFFERENT order: c, a, b
892+
// and different types that require casting (Utf8 -> Dictionary)
893+
let table_schema = Arc::new(Schema::new(vec![
894+
Field::new("c", DataType::Int32, true),
895+
Field::new("a", DataType::Int32, true),
896+
Field::new(
897+
"b",
898+
DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
899+
true,
900+
),
901+
]));
902+
903+
// Project columns [0, 2] from table schema, which should give us: c, b
904+
// Before the fix, the schema adapter would get confused about which fields
905+
// to select from the file, causing incorrect type mappings.
906+
let projection = vec![0, 2];
907+
908+
let opener = VortexOpener {
909+
session: SESSION.clone(),
910+
object_store: object_store.clone(),
911+
projection: Some(projection.into()),
912+
filter: None,
913+
file_pruning_predicate: None,
914+
expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory) as _),
915+
schema_adapter_factory: Arc::new(DefaultSchemaAdapterFactory),
916+
partition_fields: vec![],
917+
file_cache: VortexFileCache::new(1, 1, SESSION.clone()),
918+
logical_schema: table_schema.clone(),
919+
batch_size: 100,
920+
limit: None,
921+
metrics: Default::default(),
922+
layout_readers: Default::default(),
923+
has_output_ordering: false,
924+
};
925+
926+
// This should succeed and return the correctly projected and cast data
927+
let data = opener
928+
.open(
929+
make_meta(file_path, data_size),
930+
PartitionedFile::new(file_path.to_string(), data_size),
931+
)?
932+
.await?
933+
.try_collect::<Vec<_>>()
934+
.await?;
935+
936+
// Verify the columns are in the right order and have the right values
937+
use datafusion::arrow::util::pretty::pretty_format_batches_with_options;
938+
let format_opts = FormatOptions::new().with_types_info(true);
939+
assert_snapshot!(pretty_format_batches_with_options(&data, &format_opts)?.to_string(), @r"
940+
+-------+--------------------------+
941+
| c | b |
942+
| Int32 | Dictionary(UInt32, Utf8) |
943+
+-------+--------------------------+
944+
| 2 | test |
945+
+-------+--------------------------+
946+
");
947+
948+
Ok(())
949+
}
873950
}

0 commit comments

Comments
 (0)