@@ -870,4 +870,81 @@ mod tests {
870870
871871 Ok ( ( ) )
872872 }
873+
874+ #[ tokio:: test]
875+ // Minimal reproducing test for the schema projection bug.
876+ // Before the fix, this would fail with a cast error when the file schema
877+ // and table schema have different field orders and we project a subset of columns.
878+ async fn test_projection_bug_minimal_repro ( ) -> anyhow:: Result < ( ) > {
879+ let object_store = Arc :: new ( InMemory :: new ( ) ) as Arc < dyn ObjectStore > ;
880+ let file_path = "/path/file.vortex" ;
881+
882+ // File has columns in order: a, b, c with simple types
883+ let batch = record_batch ! (
884+ ( "a" , Int32 , vec![ Some ( 1 ) ] ) ,
885+ ( "b" , Utf8 , vec![ Some ( "test" ) ] ) ,
886+ ( "c" , Int32 , vec![ Some ( 2 ) ] )
887+ )
888+ . unwrap ( ) ;
889+ let data_size = write_arrow_to_vortex ( object_store. clone ( ) , file_path, batch) . await ?;
890+
891+ // Table schema has columns in DIFFERENT order: c, a, b
892+ // and different types that require casting (Utf8 -> Dictionary)
893+ let table_schema = Arc :: new ( Schema :: new ( vec ! [
894+ Field :: new( "c" , DataType :: Int32 , true ) ,
895+ Field :: new( "a" , DataType :: Int32 , true ) ,
896+ Field :: new(
897+ "b" ,
898+ DataType :: Dictionary ( Box :: new( DataType :: UInt32 ) , Box :: new( DataType :: Utf8 ) ) ,
899+ true ,
900+ ) ,
901+ ] ) ) ;
902+
903+ // Project columns [0, 2] from table schema, which should give us: c, b
904+ // Before the fix, the schema adapter would get confused about which fields
905+ // to select from the file, causing incorrect type mappings.
906+ let projection = vec ! [ 0 , 2 ] ;
907+
908+ let opener = VortexOpener {
909+ session : SESSION . clone ( ) ,
910+ object_store : object_store. clone ( ) ,
911+ projection : Some ( projection. into ( ) ) ,
912+ filter : None ,
913+ file_pruning_predicate : None ,
914+ expr_adapter_factory : Some ( Arc :: new ( DefaultPhysicalExprAdapterFactory ) as _ ) ,
915+ schema_adapter_factory : Arc :: new ( DefaultSchemaAdapterFactory ) ,
916+ partition_fields : vec ! [ ] ,
917+ file_cache : VortexFileCache :: new ( 1 , 1 , SESSION . clone ( ) ) ,
918+ logical_schema : table_schema. clone ( ) ,
919+ batch_size : 100 ,
920+ limit : None ,
921+ metrics : Default :: default ( ) ,
922+ layout_readers : Default :: default ( ) ,
923+ has_output_ordering : false ,
924+ } ;
925+
926+ // This should succeed and return the correctly projected and cast data
927+ let data = opener
928+ . open (
929+ make_meta ( file_path, data_size) ,
930+ PartitionedFile :: new ( file_path. to_string ( ) , data_size) ,
931+ ) ?
932+ . await ?
933+ . try_collect :: < Vec < _ > > ( )
934+ . await ?;
935+
936+ // Verify the columns are in the right order and have the right values
937+ use datafusion:: arrow:: util:: pretty:: pretty_format_batches_with_options;
938+ let format_opts = FormatOptions :: new ( ) . with_types_info ( true ) ;
939+ assert_snapshot ! ( pretty_format_batches_with_options( & data, & format_opts) ?. to_string( ) , @r"
940+ +-------+--------------------------+
941+ | c | b |
942+ | Int32 | Dictionary(UInt32, Utf8) |
943+ +-------+--------------------------+
944+ | 2 | test |
945+ +-------+--------------------------+
946+ " ) ;
947+
948+ Ok ( ( ) )
949+ }
873950}
0 commit comments