@@ -340,19 +340,15 @@ def parquet_decoder(
340340    # pyarrow.Buffer when we have a memoryview to avoid creating intermediate 
341341    # Python bytes objects. 
342342    if  isinstance (buffer , memoryview ):
343-         # pyarrow.py_buffer accepts buffer-protocol objects and is zero-copy 
344-         try :
345-             pa_buf  =  pyarrow .py_buffer (buffer )
346-             stream  =  pyarrow .BufferReader (pa_buf )
347-         except  Exception :
348-             # fallback to MemoryViewStream if pyarrow can't handle this memoryview 
349-             stream  =  MemoryViewStream (buffer )
343+         pa_buf  =  pyarrow .py_buffer (buffer )
344+         stream  =  pyarrow .BufferReader (pa_buf )
350345    elif  isinstance (buffer , bytes ):
351346        stream  =  pyarrow .BufferReader (buffer )
352347    else :
353348        stream  =  pyarrow .input_stream (buffer )
354349
355-     parquet_file  =  parquet .ParquetFile (stream )
350+     pq_meta  =  parquet .read_metadata (stream )
351+     stream .seek (0 )
356352
357353    # we need to work out if we have a selection which may force us 
358354    # fetching columns just for filtering 
@@ -366,7 +362,7 @@ def parquet_decoder(
366362        c .value  for  c  in  get_all_nodes_of_type (processed_selection , (NodeType .IDENTIFIER ,))
367363    }
368364    selected_columns  =  list (
369-         projection_set .union (filter_columns ).intersection (parquet_file . schema_arrow .names )
365+         projection_set .union (filter_columns ).intersection (pq_meta . schema .names )
370366    )
371367
372368    # Read all columns if none are selected, unless force_read is set 
@@ -376,18 +372,18 @@ def parquet_decoder(
376372    # get the full data size of the file to see how effective projection/selection is 
377373    uncompressed_size  =  sum (
378374        row_group .column (j ).total_uncompressed_size 
379-         for  i  in  range (parquet_file . metadata .num_row_groups )
380-         for  row_group  in  [parquet_file . metadata .row_group (i )]
375+         for  i  in  range (pq_meta .num_row_groups )
376+         for  row_group  in  [pq_meta .row_group (i )]
381377        for  j  in  range (row_group .num_columns )
382378    )
383379
384380    # If it's COUNT(*), we don't need to create a full dataset 
385381    # We have a handler later to sum up the $COUNT(*) column 
386382    if  projection  ==  [] and  selection  ==  []:
387-         table  =  pyarrow .Table .from_arrays ([[parquet_file . metadata .num_rows ]], names = ["$COUNT(*)" ])
383+         table  =  pyarrow .Table .from_arrays ([[pq_meta .num_rows ]], names = ["$COUNT(*)" ])
388384        return  (
389-             parquet_file . metadata .num_rows ,
390-             parquet_file . metadata .num_columns ,
385+             pq_meta .num_rows ,
386+             pq_meta .num_columns ,
391387            uncompressed_size ,
392388            table ,
393389        )
@@ -400,16 +396,15 @@ def parquet_decoder(
400396        filters = dnf_filter ,
401397        use_threads = use_threads ,
402398        use_pandas_metadata = False ,
403-         schema = parquet_file .schema_arrow ,
404399    )
405400
406401    # Any filters we couldn't push to PyArrow to read we run here 
407402    if  processed_selection :
408403        table  =  filter_records (processed_selection , table )
409404
410405    return  (
411-         parquet_file . metadata .num_rows ,
412-         parquet_file . metadata .num_columns ,
406+         pq_meta .num_rows ,
407+         pq_meta .num_columns ,
413408        uncompressed_size ,
414409        table ,
415410    )
0 commit comments