File tree Expand file tree Collapse file tree 3 files changed +10
-20
lines changed Expand file tree Collapse file tree 3 files changed +10
-20
lines changed Original file line number Diff line number Diff line change 107107
108108YAML_FIELDS_TO_CHECK = ["dataset_info" , "configs" , "viewer" , "language" ]
109109
110- USE_LIBVIEWER_FOR_DATASETS = False
110+ USE_LIBVIEWER_FOR_DATASETS = True
Original file line number Diff line number Diff line change @@ -150,7 +150,7 @@ impl Dataset {
150150 Some ( rows) => ( None , rows) ,
151151 None => {
152152 let metadata =
153- read_metadata ( self . metadata_store . clone ( ) , file. metadata_path . as_ref ( ) )
153+ read_metadata ( self . metadata_store . clone ( ) , file. metadata_path . as_ref ( ) , file . size )
154154 . await ?;
155155 let num_rows = metadata. file_metadata ( ) . num_rows ( ) as u64 ;
156156 ( Some ( metadata) , num_rows)
@@ -173,7 +173,7 @@ impl Dataset {
173173 let metadata = match metadata {
174174 Some ( meta) => meta,
175175 None => {
176- read_metadata ( self . metadata_store . clone ( ) , file. metadata_path . as_ref ( ) )
176+ read_metadata ( self . metadata_store . clone ( ) , file. metadata_path . as_ref ( ) , file . size )
177177 . await ?
178178 }
179179 } ;
@@ -199,7 +199,7 @@ impl Dataset {
199199 . unwrap_or ( & self . files )
200200 . iter ( )
201201 . map ( async move |file| {
202- let metadata = read_metadata ( self . data_store . clone ( ) , file. path . as_ref ( ) ) . await ?;
202+ let metadata = read_metadata ( self . data_store . clone ( ) , file. path . as_ref ( ) , file . size ) . await ?;
203203 write_metadata (
204204 metadata,
205205 self . metadata_store . clone ( ) ,
Original file line number Diff line number Diff line change @@ -75,6 +75,7 @@ impl<T: AsyncFileReader> AsyncFileReader for LimitedAsyncReader<T> {
7575pub async fn read_metadata (
7676 store : Arc < dyn ObjectStore > ,
7777 path : impl Into < Path > ,
78+ size : Option < u64 > ,
7879) -> Result < Arc < ParquetMetaData > > {
7980 let path = path. into ( ) ;
8081
@@ -84,22 +85,11 @@ pub async fn read_metadata(
8485 . with_offset_index_policy ( PageIndexPolicy :: Optional ) ;
8586 // .with_prefetch_hint(16 * 1024);
8687
87- // TODO(kszucs): if file_size is known then use load_and_finish
88- // let metadata = if let Some(file_size) = self.file_size {
89- // metadata.load_and_finish(self, file_size).await?
90- // } else {
91- // metadata.load_via_suffix_and_finish(self).await?
92- // };
93-
94- let metadata = metadata_reader
95- . load_via_suffix_and_finish ( & mut object_reader)
96- . await
97- . map_err ( |e| {
98- ParquetError :: General ( format ! (
99- "Failed to read metadata from path '{}' in store: {}: {}" ,
100- path, store, e
101- ) )
102- } ) ?;
88+ let metadata = if let Some ( file_size) = size {
89+ metadata_reader. load_and_finish ( & mut object_reader, file_size) . await ?
90+ } else {
91+ metadata_reader. load_via_suffix_and_finish ( & mut object_reader) . await ?
92+ } ;
10393
10494 Ok ( Arc :: new ( metadata) )
10595}
You can’t perform that action at this time.
0 commit comments