Skip to content

Commit 21bda11

Browse files
committed
vortex-datafusion: Add test for different table schema order
Signed-off-by: Frederic Branczyk <[email protected]>
1 parent bec5098 commit 21bda11

File tree

1 file changed

+68
-0
lines changed

1 file changed

+68
-0
lines changed

vortex-datafusion/src/persistent/opener.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,74 @@ mod tests {
658658
Ok(())
659659
}
660660

661+
#[tokio::test]
662+
// This test verifies that files with different column order than the
663+
// table schema can be opened without errors. The fix ensures that the
664+
// schema mapper is only used for type casting, not for reordering,
665+
// since the vortex projection already handles reordering.
666+
async fn test_schema_different_column_order() -> anyhow::Result<()> {
667+
use datafusion::arrow::util::pretty::pretty_format_batches_with_options;
668+
669+
let object_store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
670+
let file_path = "/path/file.vortex";
671+
672+
// File has columns in order: c, b, a
673+
let batch = record_batch!(
674+
("c", Int32, vec![Some(300), Some(301), Some(302)]),
675+
("b", Int32, vec![Some(200), Some(201), Some(202)]),
676+
("a", Int32, vec![Some(100), Some(101), Some(102)])
677+
)
678+
.unwrap();
679+
let data_size = write_arrow_to_vortex(object_store.clone(), file_path, batch).await?;
680+
let file = PartitionedFile::new(file_path.to_string(), data_size);
681+
682+
// Table schema has columns in different order: a, b, c
683+
let table_schema = Arc::new(Schema::new(vec![
684+
Field::new("a", DataType::Int32, true),
685+
Field::new("b", DataType::Int32, true),
686+
Field::new("c", DataType::Int32, true),
687+
]));
688+
689+
let opener = VortexOpener {
690+
session: SESSION.clone(),
691+
object_store: object_store.clone(),
692+
projection: Some([0, 1, 2].into()),
693+
filter: None,
694+
file_pruning_predicate: None,
695+
expr_adapter_factory: Some(Arc::new(DefaultPhysicalExprAdapterFactory) as _),
696+
schema_adapter_factory: Arc::new(DefaultSchemaAdapterFactory),
697+
partition_fields: vec![],
698+
file_cache: VortexFileCache::new(1, 1, SESSION.clone()),
699+
logical_schema: table_schema.clone(),
700+
batch_size: 100,
701+
limit: None,
702+
metrics: Default::default(),
703+
layout_readers: Default::default(),
704+
has_output_ordering: false,
705+
};
706+
707+
// The opener should successfully open the file and reorder columns
708+
let stream = opener.open(make_meta(file_path, data_size), file)?.await?;
709+
710+
let format_opts = FormatOptions::new().with_types_info(true);
711+
let data = stream.try_collect::<Vec<_>>().await?;
712+
713+
// Verify the output has columns in table schema order (a, b, c)
714+
// not file order (c, b, a)
715+
assert_snapshot!(pretty_format_batches_with_options(&data, &format_opts)?.to_string(), @r"
716+
+-------+-------+-------+
717+
| a | b | c |
718+
| Int32 | Int32 | Int32 |
719+
+-------+-------+-------+
720+
| 100 | 200 | 300 |
721+
| 101 | 201 | 301 |
722+
| 102 | 202 | 302 |
723+
+-------+-------+-------+
724+
");
725+
726+
Ok(())
727+
}
728+
661729
#[tokio::test]
662730
// This test verifies that expression rewriting doesn't fail when there is
663731
// a nested schema mismatch between the physical file schema and logical

0 commit comments

Comments
 (0)