99import codecs
1010from collections import defaultdict
1111from collections .abc import (
12+ Callable ,
1213 Hashable ,
1314 Mapping ,
1415 Sequence ,
1516)
1617import dataclasses
1718import functools
1819import gzip
20+ from importlib .metadata import entry_points
1921from io import (
2022 BufferedIOBase ,
2123 BytesIO ,
5153import warnings
5254import zipfile
5355
54- import pkg_resources
55-
5656from pandas ._typing import (
5757 BaseBuffer ,
5858 ReadCsvBuffer ,
@@ -1290,9 +1290,9 @@ def dedup_names(
12901290 return names
12911291
12921292
1293- def _engine_func ( format_name : str , engine_name : str , is_writer : bool ):
1293+ def _get_io_engine ( name : str ):
12941294 """
1295- Return the engine function for a given format and operation .
1295+ Return an I/O engine by its name .
12961296
12971297 pandas I/O engines can be registered via entry points. The first time this
12981298 function is called it will register all the entry points of the "pandas.io_engine"
@@ -1304,13 +1304,8 @@ def _engine_func(format_name: str, engine_name: str, is_writer: bool):
13041304
13051305 Parameters
13061306 ----------
1307- format_name : str
1308- The format such as 'csv', 'parquet', 'json', 'html', etc.
1309- engine_name : str
1307+ name : str
13101308 The engine name provided by the user in `engine=<value>`.
1311- is_writer : bool
1312- `True` to return the `to_<format>` function, `False` to return the
1313- `read_<format>` one.
13141309
13151310 Examples
13161311 --------
@@ -1330,59 +1325,57 @@ def _engine_func(format_name: str, engine_name: str, is_writer: bool):
13301325
13311326 ```
13321327
1333- Then the `read_csv` method of the engine can be retrieved with:
1328+ Then the `read_csv` method of the engine can be used with:
13341329
1335- >>> func = _engine_func(format_name="csv", engine_name="dummy", is_writer=False)
1330+ >>> _get_io_engine( engine_name="dummy").read_csv("myfile.csv") # doctest: +SKIP
13361331
13371332 This is used internally to dispatch the next pandas call to the engine caller:
13381333
1339- >>> df = read_csv("myfile.csv", engine="dummy")
1334+ >>> df = read_csv("myfile.csv", engine="dummy") # doctest: +SKIP
13401335 """
13411336 global _io_engines
13421337
13431338 if _io_engines is None :
13441339 _io_engines = {}
1345- for entry_point in pkg_resources .iter_entry_points (group = "pandas.io_engine" ):
1346- _io_engines [entry_point .name ] = entry_point .load ()
1340+ for entry_point in entry_points ().select (group = "pandas.io_engine" ):
1341+ package_name = entry_point .dist .metadata ["Name" ]
1342+ if entry_point .name in _io_engines :
1343+ _io_engines [entry_point .name ]._other_providers .append (package_name )
1344+ else :
1345+ _io_engines [entry_point .name ] = entry_point .load ()
1346+ _io_engines [entry_point .name ]._provider_name = package_name
1347+ _io_engines [entry_point .name ]._other_providers = []
13471348
13481349 try :
1349- engine_class = _io_engines [engine_name ]
1350+ engine = _io_engines [name ]
13501351 except KeyError as err :
13511352 raise ValueError (
1352- f"'{ engine_name } ' is not a known engine. Some engines are only available "
1353+ f"'{ name } ' is not a known engine. Some engines are only available "
13531354 "after installing the package that provides them."
13541355 ) from err
13551356
1356- func_name = f"to_{ format_name } " if is_writer else f"read_{ format_name } "
1357- try :
1358- engine_method = getattr (engine_class , func_name )
1359- except AttributeError as err :
1360- raise ValueError (
1361- f"The engine '{ engine_name } ' does not provide a '{ func_name } ' function"
1362- ) from err
1363- else :
1364- return engine_method
1365-
1366-
1367- def _extract_io_function_info (func_name ):
1368- """
1369- Return the format and if it's a reader or writer from a function name like read_csv.
1370- """
1371- op_type , format_name = func_name .split ("_" , maxsplit = 1 )
1372- if op_type == "read" :
1373- is_writer = False
1374- elif op_type == "to" :
1375- is_writer = True
1376- else :
1377- raise ValueError (
1378- "Unable to extract info from the function name '{func_name}'. "
1379- "The expected format is `read_<format> or `to_<format>`."
1357+ if engine ._other_providers :
1358+ msg = (
1359+ f"The engine '{ name } ' has been registered by the package "
1360+ f"'{ engine ._provider_name } ' and will be used. "
13801361 )
1362+ if len (engine ._other_providers ):
1363+ msg += (
1364+ "The package '{engine._other_providers}' also tried to register "
1365+ "the engine, but it couldn't because it was already registered."
1366+ )
1367+ else :
1368+ msg += (
1369+ "Other packages that tried to register the engine, but they couldn't "
1370+ "because it was already registered are: "
1371+ f"{ str (engine ._other_providers )[1 :- 1 ]} ."
1372+ )
1373+ warnings .warn (RuntimeWarning , msg , stacklevel = find_stack_level ())
13811374
1382- return format_name , is_writer
1375+ return engine
13831376
13841377
1385- def allow_third_party_engines (skip_engines : list [str ] | None = None ):
1378+ def allow_third_party_engines (skip_engines : list [str ] | Callable | None = None ):
13861379 """
13871380 Decorator to avoid boilerplate code when allowing readers and writers to use
13881381 third-party engines.
@@ -1415,14 +1408,21 @@ def allow_third_party_engines(skip_engines: list[str] | None = None):
14151408 def decorator (func ):
14161409 @functools .wraps (func )
14171410 def wrapper (* args , ** kwargs ):
1418- if "engine" in kwargs and kwargs ["engine" ] not in skip_engines :
1419- format_name , is_writer = _extract_io_function_info (func .__name__ )
1420- engine_func = _engine_func (
1421- format_name = format_name ,
1422- engine_name = kwargs .pop ("engine" ),
1423- is_writer = is_writer ,
1424- )
1425- return engine_func (* args , ** kwargs )
1411+ if callable (skip_engines ):
1412+ skip_engine = False
1413+ else :
1414+ skip_engine = kwargs ["engine" ] in skip_engines
1415+
1416+ if "engine" in kwargs and not skip_engine :
1417+ engine_name = kwargs .pop ("engine" )
1418+ engine = _get_io_engine (engine_name )
1419+ try :
1420+ return getattr (engine , func .__name__ )(* args , ** kwargs )
1421+ except AttributeError as err :
1422+ raise ValueError (
1423+ f"The engine '{ engine_name } ' does not provide a "
1424+ f"'{ func .__name__ } ' function"
1425+ ) from err
14261426 else :
14271427 return func (* args , ** kwargs )
14281428
0 commit comments