@@ -269,9 +269,9 @@ def _assign_variables(
269269
270270def handle_data_source (data : object ) -> pd .DataFrame | Mapping | None :
271271 """Convert the data source object to a common union representation."""
272- if isinstance (data , pd .DataFrame ) or hasattr (data , "__dataframe__ " ):
272+ if isinstance (data , pd .DataFrame ) or hasattr (data , "__arrow_c_stream__ " ):
273273 # Check for pd.DataFrame inheritance could be removed once
274- # minimal pandas version supports dataframe interchange (1.5.0 ).
274+ # minimal pandas version supports PyCapsule Interface (2.2 ).
275275 data = convert_dataframe_to_pandas (data )
276276 elif data is not None and not isinstance (data , Mapping ):
277277 err = f"Data source must be a DataFrame or Mapping, not { type (data )!r} ."
@@ -285,35 +285,29 @@ def convert_dataframe_to_pandas(data: object) -> pd.DataFrame:
285285 if isinstance (data , pd .DataFrame ):
286286 return data
287287
288- if not hasattr (pd .api , "interchange" ):
289- msg = (
290- "Support for non-pandas DataFrame objects requires a version of pandas "
291- "that implements the DataFrame interchange protocol. Please upgrade "
292- "your pandas version or coerce your data to pandas before passing "
293- "it to seaborn."
294- )
295- raise TypeError (msg )
296-
297- if _version_predates (pd , "2.0.2" ):
298- msg = (
299- "DataFrame interchange with pandas<2.0.2 has some known issues. "
300- f"You are using pandas { pd .__version__ } . "
301- "Continuing, but it is recommended to carefully inspect the results and to "
302- "consider upgrading."
303- )
304- warnings .warn (msg , stacklevel = 2 )
305-
306- try :
307- # This is going to convert all columns in the input dataframe, even though
308- # we may only need one or two of them. It would be more efficient to select
309- # the columns that are going to be used in the plot prior to interchange.
310- # Solving that in general is a hard problem, especially with the objects
311- # interface where variables passed in Plot() may only be referenced later
312- # in Plot.add(). But noting here in case this seems to be a bottleneck.
313- return pd .api .interchange .from_dataframe (data )
314- except Exception as err :
315- msg = (
316- "Encountered an exception when converting data source "
317- "to a pandas DataFrame. See traceback above for details."
318- )
319- raise RuntimeError (msg ) from err
288+ if hasattr (data , '__arrow_c_stream__' ):
289+ try :
290+ import pyarrow
291+ except ImportError as err :
292+ msg = "PyArrow is required for non-pandas Dataframe support."
293+ raise RuntimeError (msg ) from err
294+ if _version_predates (pyarrow , '14.0.0' ):
295+ msg = "PyArrow>=14.0.0 is required for non-pandas Dataframe support."
296+ raise RuntimeError (msg )
297+ try :
298+ # This is going to convert all columns in the input dataframe, even though
299+ # we may only need one or two of them. It would be more efficient to select
300+ # the columns that are going to be used in the plot prior to interchange.
301+ # Solving that in general is a hard problem, especially with the objects
302+ # interface where variables passed in Plot() may only be referenced later
303+ # in Plot.add(). But noting here in case this seems to be a bottleneck.
304+ return pyarrow .table (data ).to_pandas ()
305+ except Exception as err :
306+ msg = (
307+ "Encountered an exception when converting data source "
308+ "to a pandas DataFrame. See traceback above for details."
309+ )
310+ raise RuntimeError (msg ) from err
311+
312+ msg = f"Expected object which implements '__arrow_c_stream__' from the PyCapsule Interface, got: { type (data )} "
313+ raise TypeError (msg )
0 commit comments