88given as a folder on local disk 
99""" 
1010
11- import  contextlib 
12- import  mmap 
1311import  os 
14- import  platform 
1512import  time 
1613from  typing  import  Dict 
1714from  typing  import  List 
3431from  opteryx .utils .file_decoders  import  get_decoder 
3532
3633OS_SEP  =  os .sep 
37- IS_LINUX  =  platform .system () ==  "Linux" 
38- 
39- 
40- # prefer MAP_PRIVATE and on Linux enable MAP_POPULATE to fault pages in 
41- flags  =  mmap .MAP_PRIVATE 
42- if  IS_LINUX :
43-     with  contextlib .suppress (Exception ):
44-         flags  |=  getattr (mmap , "MAP_POPULATE" , 0 )
45- mmap_config  =  {}
46- mmap_config ["flags" ] =  flags 
47- mmap_config ["prot" ] =  mmap .PROT_READ 
4834
4935
5036class  DiskConnector (BaseConnector , Partitionable , PredicatePushable , LimitPushable , Statistics ):
@@ -128,7 +114,7 @@ def read_blob(
128114                If an I/O error occurs while reading the file. 
129115        """ 
130116        from  opteryx .compiled .io .disk_reader  import  read_file_mmap 
131-         from  opteryx .compiled .io .disk_reader  import  unmap_memory 
117+         # from opteryx.compiled.io.disk_reader import unmap_memory
132118
133119        # Read using mmap for maximum speed 
134120        mmap_obj  =  read_file_mmap (blob_name )
@@ -157,7 +143,8 @@ def read_blob(
157143            return  result 
158144        finally :
159145            # CRITICAL: Clean up the memory mapping 
160-             unmap_memory (mmap_obj )
146+             pass 
147+             # unmap_memory(mmap_obj) 
161148
162149    @single_item_cache  
163150    def  get_list_of_blob_names (self , * , prefix : str ) ->  List [str ]:
@@ -219,7 +206,7 @@ def read_dataset(
219206            decoder  =  get_decoder (blob_name )
220207            try :
221208                if  not  just_schema :
222-                     num_rows , _ , raw_bytes , decoded  =  self .read_blob (
209+                     num_rows , _ , raw_size , decoded  =  self .read_blob (
223210                        blob_name = blob_name ,
224211                        decoder = decoder ,
225212                        just_schema = False ,
@@ -234,8 +221,8 @@ def read_dataset(
234221
235222                    self .statistics .rows_seen  +=  num_rows 
236223                    self .rows_seen  +=  num_rows 
237-                     self .statistics .bytes_raw  +=  raw_bytes 
238224                    self .blobs_seen  +=  1 
225+                     self .statistics .bytes_raw  +=  raw_size 
239226                    yield  decoded 
240227
241228                    # if we have read all the rows we need to stop 
@@ -247,14 +234,9 @@ def read_dataset(
247234                        decoder = decoder ,
248235                        just_schema = True ,
249236                    )
250-                     # Some decoders may return None for schema (e.g. unreadable 
251-                     # or undecidable schema). Skip those and continue with the 
252-                     # next blob instead of trying to access attributes on None. 
253-                     if  schema  is  None :
254-                         continue 
255237                    # if we have more than one blob we need to estimate the row count 
256238                    blob_count  =  len (blob_names )
257-                     if  getattr ( schema ,  " row_count_metric" ,  None )  and  blob_count  >  1 :
239+                     if  schema . row_count_metric  and  blob_count  >  1 :
258240                        schema .row_count_estimate  =  schema .row_count_metric  *  blob_count 
259241                        schema .row_count_metric  =  None 
260242                        self .statistics .estimated_row_count  +=  schema .row_count_estimate 
0 commit comments