3333_logger : logging .Logger = logging .getLogger (__name__ )
3434
3535
36+ def _get_version_id_for (version_id : Optional [Union [str , Dict [str , str ]]], path : str ) -> Optional [str ]:
37+ if isinstance (version_id , dict ):
38+ return version_id .get (path , None )
39+
40+ return version_id
41+
42+
3643def _get_read_details (path : str , pandas_kwargs : Dict [str , Any ]) -> Tuple [str , Optional [str ], Optional [str ]]:
3744 if pandas_kwargs .get ("compression" , "infer" ) == "infer" :
3845 pandas_kwargs ["compression" ] = infer_compression (path , compression = "infer" )
@@ -52,7 +59,7 @@ def _read_text_chunked(
5259 s3_additional_kwargs : Optional [Dict [str , str ]],
5360 dataset : bool ,
5461 use_threads : Union [bool , int ],
55- version_ids : Optional [Dict [str , str ]] = None ,
62+ version_ids : Optional [Dict [str , Optional [ str ] ]] = None ,
5663) -> Iterator [pd .DataFrame ]:
5764 for path in paths :
5865 _logger .debug ("path: %s" , path )
@@ -157,19 +164,21 @@ def _read_text(
157164 }
158165 _logger .debug ("args:\n %s" , pprint .pformat (args ))
159166
160- if chunksize is not None :
161- return _read_text_chunked (
162- paths = paths , version_ids = version_id if isinstance ( version_id , dict ) else None , chunksize = chunksize , ** args
167+ if len ( paths ) > 1 and version_id is not None and not isinstance ( version_id , dict ) :
168+ raise exceptions . InvalidArgumentCombination (
169+ "If multiple paths are provided along with a file version ID, the version ID parameter must be a dict."
163170 )
171+ version_id_dict = {path : _get_version_id_for (version_id , path ) for path in paths }
164172
165- version_id = version_id if isinstance (version_id , dict ) else None
173+ if chunksize is not None :
174+ return _read_text_chunked (paths = paths , version_ids = version_id_dict , chunksize = chunksize , ** args )
166175
167176 executor = _get_executor (use_threads = use_threads )
168177 tables = executor .map (
169178 _read_text_file ,
170179 session ,
171180 paths ,
172- itertools . repeat ( version_id ) ,
181+ [ version_id_dict [ path ] for path in paths ] ,
173182 itertools .repeat (parser_func ),
174183 itertools .repeat (path_root ),
175184 itertools .repeat (pandas_kwargs ),
0 commit comments