@@ -2,7 +2,7 @@ use std::fs::File;
22use std:: ops:: Range ;
33use std:: path:: { Path , PathBuf } ;
44use std:: process:: Command ;
5- use std:: sync:: { Arc , LazyLock } ;
5+ use std:: sync:: Arc ;
66
77use arrow_array:: types:: Int64Type ;
88use arrow_array:: {
@@ -24,18 +24,16 @@ use stream::StreamExt;
2424use vortex:: aliases:: hash_map:: HashMap ;
2525use vortex:: array:: ChunkedArray ;
2626use vortex:: arrow:: FromArrowType ;
27- use vortex:: buffer:: Buffer ;
2827use vortex:: compress:: CompressionStrategy ;
2928use vortex:: dtype:: DType ;
3029use vortex:: error:: VortexResult ;
31- use vortex:: file:: { LayoutContext , LayoutDeserializer , VortexFileWriter , VortexReadBuilder } ;
32- use vortex:: io:: { IoDispatcher , ObjectStoreReadAt , TokioFile , VortexReadAt , VortexWrite } ;
30+ use vortex:: file:: v2 :: { VortexOpenOptions , VortexWriteOptions } ;
31+ use vortex:: io:: { ObjectStoreReadAt , TokioFile , VortexReadAt , VortexWrite } ;
3332use vortex:: sampling_compressor:: { SamplingCompressor , ALL_ENCODINGS_CONTEXT } ;
33+ use vortex:: scan:: Scan ;
34+ use vortex:: stream:: ArrayStreamExt ;
3435use vortex:: { ArrayData , IntoArrayData , IntoCanonical } ;
3536
36- static DISPATCHER : LazyLock < Arc < IoDispatcher > > =
37- LazyLock :: new ( || Arc :: new ( IoDispatcher :: default ( ) ) ) ;
38-
3937pub const BATCH_SIZE : usize = 65_536 ;
4038
4139#[ derive( Debug , Clone , Serialize , Deserialize ) ]
@@ -48,19 +46,12 @@ pub struct VortexFooter {
4846pub async fn open_vortex ( path : & Path ) -> VortexResult < ArrayData > {
4947 let file = TokioFile :: open ( path) . unwrap ( ) ;
5048
51- VortexReadBuilder :: new (
52- file,
53- LayoutDeserializer :: new (
54- ALL_ENCODINGS_CONTEXT . clone ( ) ,
55- LayoutContext :: default ( ) . into ( ) ,
56- ) ,
57- )
58- . with_io_dispatcher ( DISPATCHER . clone ( ) )
59- . build ( )
60- . await ?
61- . into_stream ( )
62- . read_all ( )
63- . await
49+ VortexOpenOptions :: new ( ALL_ENCODINGS_CONTEXT . clone ( ) )
50+ . open ( file)
51+ . await ?
52+ . scan ( Scan :: all ( ) ) ?
53+ . into_array_data ( )
54+ . await
6455}
6556
6657pub async fn rewrite_parquet_as_vortex < W : VortexWrite > (
@@ -69,11 +60,10 @@ pub async fn rewrite_parquet_as_vortex<W: VortexWrite>(
6960) -> VortexResult < ( ) > {
7061 let chunked = compress_parquet_to_vortex ( parquet_path. as_path ( ) ) ?;
7162
72- VortexFileWriter :: new ( write)
73- . write_array_columns ( chunked)
74- . await ?
75- . finalize ( )
63+ VortexWriteOptions :: default ( )
64+ . write ( write, chunked. into_array_stream ( ) )
7665 . await ?;
66+
7767 Ok ( ( ) )
7868}
7969
@@ -116,25 +106,19 @@ pub fn write_csv_as_parquet(csv_path: PathBuf, output_path: &Path) -> VortexResu
116106
117107async fn take_vortex < T : VortexReadAt + Unpin + ' static > (
118108 reader : T ,
119- indices : & [ u64 ] ,
109+ _indices : & [ u64 ] ,
120110) -> VortexResult < ArrayData > {
121- VortexReadBuilder :: new (
122- reader,
123- LayoutDeserializer :: new (
124- ALL_ENCODINGS_CONTEXT . clone ( ) ,
125- LayoutContext :: default ( ) . into ( ) ,
126- ) ,
127- )
128- . with_io_dispatcher ( DISPATCHER . clone ( ) )
129- . with_indices ( Buffer :: copy_from ( indices) . into_array ( ) )
130- . build ( )
131- . await ?
132- . into_stream ( )
133- . read_all ( )
134- . await
135- // For equivalence.... we decompress to make sure we're not cheating too much.
136- . and_then ( IntoCanonical :: into_canonical)
137- . map ( ArrayData :: from)
111+ VortexOpenOptions :: new ( ALL_ENCODINGS_CONTEXT . clone ( ) )
112+ . open ( reader)
113+ . await ?
114+ // FIXME(ngates): support row indices
115+ // .scan_rows(Scan::all(), indices.iter().copied())?
116+ . scan ( Scan :: all ( ) ) ?
117+ . into_array_data ( )
118+ . await ?
119+ // For equivalence.... we decompress to make sure we're not cheating too much.
120+ . into_canonical ( )
121+ . map ( ArrayData :: from)
138122}
139123
140124pub async fn take_vortex_object_store (
0 commit comments