55
66from collections .abc import Mapping , Sized
77from typing import cast
8- import warnings
98
109import pandas as pd
1110from pandas import DataFrame
@@ -269,9 +268,9 @@ def _assign_variables(
269268
270269def handle_data_source (data : object ) -> pd .DataFrame | Mapping | None :
271270 """Convert the data source object to a common union representation."""
272- if isinstance (data , pd .DataFrame ) or hasattr (data , "__dataframe__ " ):
271+ if isinstance (data , pd .DataFrame ) or hasattr (data , "__arrow_c_stream__ " ):
273272 # Check for pd.DataFrame inheritance could be removed once
274- # minimal pandas version supports dataframe interchange (1.5.0 ).
273+ # minimal pandas version supports PyCapsule Interface (2.2 ).
275274 data = convert_dataframe_to_pandas (data )
276275 elif data is not None and not isinstance (data , Mapping ):
277276 err = f"Data source must be a DataFrame or Mapping, not { type (data )!r} ."
@@ -285,35 +284,32 @@ def convert_dataframe_to_pandas(data: object) -> pd.DataFrame:
285284 if isinstance (data , pd .DataFrame ):
286285 return data
287286
288- if not hasattr (pd .api , "interchange" ):
289- msg = (
290- "Support for non-pandas DataFrame objects requires a version of pandas "
291- "that implements the DataFrame interchange protocol. Please upgrade "
292- "your pandas version or coerce your data to pandas before passing "
293- "it to seaborn."
294- )
295- raise TypeError (msg )
296-
297- if _version_predates (pd , "2.0.2" ):
298- msg = (
299- "DataFrame interchange with pandas<2.0.2 has some known issues. "
300- f"You are using pandas { pd .__version__ } . "
301- "Continuing, but it is recommended to carefully inspect the results and to "
302- "consider upgrading."
303- )
304- warnings .warn (msg , stacklevel = 2 )
305-
306- try :
307- # This is going to convert all columns in the input dataframe, even though
308- # we may only need one or two of them. It would be more efficient to select
309- # the columns that are going to be used in the plot prior to interchange.
310- # Solving that in general is a hard problem, especially with the objects
311- # interface where variables passed in Plot() may only be referenced later
312- # in Plot.add(). But noting here in case this seems to be a bottleneck.
313- return pd .api .interchange .from_dataframe (data )
314- except Exception as err :
315- msg = (
316- "Encountered an exception when converting data source "
317- "to a pandas DataFrame. See traceback above for details."
318- )
319- raise RuntimeError (msg ) from err
287+ if hasattr (data , '__arrow_c_stream__' ):
288+ try :
289+ import pyarrow
290+ except ImportError as err :
291+ msg = "PyArrow is required for non-pandas Dataframe support."
292+ raise RuntimeError (msg ) from err
293+ if _version_predates (pyarrow , '14.0.0' ):
294+ msg = "PyArrow>=14.0.0 is required for non-pandas Dataframe support."
295+ raise RuntimeError (msg )
296+ try :
297+ # This is going to convert all columns in the input dataframe, even though
298+ # we may only need one or two of them. It would be more efficient to select
299+ # the columns that are going to be used in the plot prior to interchange.
300+ # Solving that in general is a hard problem, especially with the objects
301+ # interface where variables passed in Plot() may only be referenced later
302+ # in Plot.add(). But noting here in case this seems to be a bottleneck.
303+ return pyarrow .table (data ).to_pandas ()
304+ except Exception as err :
305+ msg = (
306+ "Encountered an exception when converting data source "
307+ "to a pandas DataFrame. See traceback above for details."
308+ )
309+ raise RuntimeError (msg ) from err
310+
311+ msg = (
312+ "Expected object which implements '__arrow_c_stream__' from the "
313+ f"PyCapsule Interface, got: { type (data )} "
314+ )
315+ raise TypeError (msg )
0 commit comments