@@ -37,7 +37,7 @@ class ParquetConfig(datasets.BuilderConfig):
3737 This is especially useful to configure buffering and caching.
3838
3939 <Added version="4.2.0"/>
40- on_bad_file (`Literal["error", "warn", "skip"]`, *optional*, defaults to "error")
40+ on_bad_files (`Literal["error", "warn", "skip"]`, *optional*, defaults to "error")
4141 Specify what to do upon encountering a bad file (a file that can't be read). Allowed values are :
4242 * 'error', raise an Exception when a bad file is encountered.
4343 * 'warn', raise a warning when a bad file is encountered and skip that file.
@@ -81,7 +81,7 @@ class ParquetConfig(datasets.BuilderConfig):
8181 features : Optional [datasets .Features ] = None
8282 filters : Optional [Union [ds .Expression , list [tuple ], list [list [tuple ]]]] = None
8383 fragment_scan_options : Optional [ds .ParquetFragmentScanOptions ] = None
84- on_bad_file : Literal ["error" , "warn" , "skip" ] = "error"
84+ on_bad_files : Literal ["error" , "warn" , "skip" ] = "error"
8585
8686 def __post_init__ (self ):
8787 super ().__post_init__ ()
@@ -122,10 +122,10 @@ def _split_generators(self, dl_manager):
122122 self .info .features = datasets .Features .from_arrow_schema (pq .read_schema (f ))
123123 break
124124 except pa .ArrowInvalid as e :
125- if self .config .on_bad_file == "error" :
125+ if self .config .on_bad_files == "error" :
126126 logger .error (f"Failed to read schema from '{ file } ' with error { type (e ).__name__ } : { e } " )
127127 raise
128- elif self .config .on_bad_file == "warn" :
128+ elif self .config .on_bad_files == "warn" :
129129 logger .warning (f"Skipping bad schema from '{ file } '. { type (e ).__name__ } : { e } `" )
130130 else :
131131 logger .debug (f"Skipping bad schema from '{ file } '. { type (e ).__name__ } : { e } `" )
@@ -180,10 +180,10 @@ def _generate_tables(self, files):
180180 # logger.warning('\n'.join(str(pa_table.slice(i, 1).to_pydict()) for i in range(pa_table.num_rows)))
181181 yield f"{ file_idx } _{ batch_idx } " , self ._cast_table (pa_table )
182182 except (pa .ArrowInvalid , ValueError ) as e :
183- if self .config .on_bad_file == "error" :
183+ if self .config .on_bad_files == "error" :
184184 logger .error (f"Failed to read file '{ file } ' with error { type (e ).__name__ } : { e } " )
185185 raise
186- elif self .config .on_bad_file == "warn" :
186+ elif self .config .on_bad_files == "warn" :
187187 logger .warning (f"Skipping bad file '{ file } '. { type (e ).__name__ } : { e } `" )
188188 else :
189189 logger .debug (f"Skipping bad file '{ file } '. { type (e ).__name__ } : { e } `" )
0 commit comments