diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 307cc6cab..3bd64a2fe 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -113,7 +113,8 @@ jobs: steps: - uses: holoviz-dev/holoviz_tasks/pixi_lock@v0 with: - cache: ${{ github.event.inputs.cache == 'true' || github.event.inputs.cache == '' }} + cache: false # NOTE: REVERT + # cache: ${{ github.event.inputs.cache == 'true' || github.event.inputs.cache == '' }} unit_test_suite: name: unit:${{ matrix.environment }}:${{ matrix.os }} diff --git a/hvplot/converter.py b/hvplot/converter.py index 361b55da5..13c6d1021 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -8,6 +8,7 @@ import pandas as pd import numpy as np import colorcet as cc +import narwhals as nw from bokeh.models import HoverTool from holoviews.core.dimension import Dimension @@ -55,6 +56,7 @@ _HV_GE_1_21_0, _Undefined, filter_opts, + is_narwhals, is_tabular, is_series, is_dask, @@ -73,7 +75,6 @@ relabel_redim, redim_, support_index, - check_library, is_geodataframe, process_derived_datetime_xarray, process_derived_datetime_pandas, @@ -1422,6 +1423,9 @@ def _process_data( elif isinstance(data, pd.DataFrame): datatype = 'pandas' self.data = data + elif isinstance(data, (nw.LazyFrame, nw.DataFrame, nw.Series)): + datatype = 'narwhals' + self.data = data elif is_dask(data): datatype = 'dask' self.data = data.persist() if persist else data @@ -2638,19 +2642,29 @@ def _category_plot(self, element, x: str, y: list[str], data): cur_opts, compat_opts = self._get_compat_opts(element.name, labelled=labelled) - id_vars = [x] - if any(v in self.indexes for v in id_vars): - # Calling reset_index() is required since id_vars from melt - # only accepts column names, not index names. - data = data.reset_index() - data = data[y + [x]] - - if check_library(data, 'dask'): - from dask.dataframe import melt - else: - melt = pd.melt + # id_vars = [x] + # if any(v in self.indexes for v in id_vars): + # # Calling reset_index() is required since id_vars from melt + # # only accepts column names, not index names. + # data = data.reset_index() + # if x is "index": + + # + # if check_library(data, 'dask'): + # from dask.dataframe import melt + # else: + # melt = pd.melt + + # df = melt(data, id_vars=[x], var_name=self.group_label, value_name=self.value_label) + + data = nw.from_native(data) + data = data.select([*y, x]) + df = data.unpivot( + index=x, + variable_name=self.group_label, + value_name=self.value_label, + ).to_native() - df = melt(data, id_vars=[x], var_name=self.group_label, value_name=self.value_label) kdims = [x, self.group_label] vdims = [self.value_label] + self.hover_cols if self.subplots: @@ -2707,14 +2721,23 @@ def _stats_plot(self, element, y, data=None): cur_opts['labelled'] = labelled kdims = [self.group_label] - data = data[list(y)] - if check_library(data, 'dask'): - from dask.dataframe import melt - else: - melt = pd.melt - df = melt(data, var_name=self.group_label, value_name=self.value_label) - if list(y) and df[self.value_label].dtype is not data[y[0]].dtype: - df[self.value_label] = df[self.value_label].astype(data[y[0]].dtype) + # if check_library(data, 'narwhals'): + # data = data.lazy().collect().to_pandas() # HACK: Should likely be handled more elegant + # data = data[list(y)] + # if check_library(data, 'dask'): + # from dask.dataframe import melt + # else: + # melt = pd.melt + # df = melt(data, var_name=self.group_label, value_name=self.value_label) + data = nw.from_native(data) + data = data.select(list(y)) + df = data.unpivot( + index=x, + variable_name=self.group_label, + value_name=self.value_label, + ).to_native() + # if list(y) and df[self.value_label].dtype is not data[y[0]].dtype: + # df[self.value_label] = df[self.value_label].astype(data[y[0]].dtype) redim = self._merge_redim({self.value_label: ylim}) return relabel_redim( element(df, kdims, self.value_label), @@ -2794,13 +2817,11 @@ def hist(self, x=None, y=None, data=None): if hist_opts['bin_range'] is None and not self._norm_opts.get('axiswise'): ranges = [] + nw_data = nw.from_native(data) + for col in y: - ys = data[col] - ymin, ymax = (ys.min(), ys.max()) - if is_dask(ys): - ymin, ymax = ymin.compute(), ymax.compute() - elif is_ibis(ys): - ymin, ymax = ymin.execute(), ymax.execute() + ymin = nw_data.select(nw.col(col).min()).lazy().collect().item() + ymax = nw_data.select(nw.col(col).max()).lazy().collect().item() ranges.append((ymin, ymax)) if ranges: hist_opts['bin_range'] = max_range(ranges) @@ -2855,10 +2876,17 @@ def kde(self, x=None, y=None, data=None): dists = Distribution(data, y, []) else: ranges = {self.value_label: xlim} - data = data[y] - df = data.melt(var_name=self.group_label, value_name=self.value_label) + data = nw.from_native(data) + data = data.select([x, *y]) + df = data.unpivot( + index=x, + variable_name=self.group_label, + value_name=self.value_label, + ).to_native() + # data = data[y] + # df = data.melt(var_name=self.group_label, value_name=self.value_label) ds = Dataset(df) - if len(df): + if len(ds): dists = ds.to(Distribution, self.value_label) dists = dists.layout() if self.subplots else dists.overlay(sort=False) else: @@ -2946,15 +2974,20 @@ def bivariate(self, x=None, y=None, data=None): ) def ohlc(self, x=None, y=None, data=None): + from holoviews.core.util import dtype_kind + self._error_if_unavailable('ohlc', Rectangles) self._error_if_unavailable('ohlc', Segments) data = self.data if data is None else data if x is None: variables = [var for var in self.variables if var not in self.indexes] - if data[variables[0]].dtype.kind == 'M': + dtype = nw.from_native(data).schema[variables[0]] + if dtype_kind(dtype) == 'M': x = variables[0] - else: + elif self.indexes: x = self.indexes[0] + else: + x = 'index' width = self.kwds.get('bar_width', 0.5) if y is None: o, h, l, c = [col for col in data.columns if col != x][:4] # noqa: E741 @@ -2967,7 +3000,7 @@ def ohlc(self, x=None, y=None, data=None): self.hover_cols.remove(x) vdims = list(dict.fromkeys(ohlc_cols + self.hover_cols)) ds = Dataset(data, [x], vdims) - if ds.dimension_values(x).dtype.kind in 'SUO': + if dtype_kind(ds.dimension_values(x)) in 'SUO': rects = Rectangles(ds, [x, o, x, c]) else: if len(ds): @@ -2983,8 +3016,9 @@ def ohlc(self, x=None, y=None, data=None): seg_cur_opts, seg_compat_opts = self._get_compat_opts('Segments') tools = seg_cur_opts.pop('tools', []) if 'hover' in tools: - x_data = data[x] if x in data.columns else data.index - if pd.api.types.is_datetime64_any_dtype(x_data): + # x_data = data[x] if x in data.columns else data.index + dtype = nw.from_native(data).schema[x] + if dtype_kind(dtype) == 'M': # %F %T: strftime code for %Y-%m-%d %H:%M:%S. # See https://man7.org/linux/man-pages/man3/strftime.3.html x_tooltip = f'@{x}{{%F %T}}' @@ -3020,7 +3054,7 @@ def ohlc(self, x=None, y=None, data=None): def table(self, x=None, y=None, data=None): self._error_if_unavailable('table') data = self.data if data is None else data - if isinstance(data.index, (DatetimeIndex, MultiIndex)): + if isinstance(getattr(data, 'index', None), (DatetimeIndex, MultiIndex)): # To get the index displayed in the table as Bokeh doesn't show it. data = data.reset_index() @@ -3260,6 +3294,8 @@ def _geom_plot(self, x=None, y=None, data=None, kind='polygons'): data, x, y, _ = self._process_gridded_args(data, x, y, z=None) params = dict(self._relabel) + if is_narwhals(self.data) and x == 'index': + x = None if not (x and y): if is_geodataframe(data): x, y = ('Longitude', 'Latitude') if self.geo else ('x', 'y') diff --git a/hvplot/plotting/core.py b/hvplot/plotting/core.py index 096ef1cb1..54061e286 100644 --- a/hvplot/plotting/core.py +++ b/hvplot/plotting/core.py @@ -1600,15 +1600,21 @@ def _get_converter(self, x=None, y=None, kind=None, **kwds): # Reorder the columns as in the data. columns = sorted(columns, key=lambda c: column_names.index(c)) - if isinstance(self._data, pl.DataFrame): - data = self._data.select(columns).to_pandas() + if isinstance(self._data, (pl.LazyFrame, pl.DataFrame)): + data = self._data.select(columns) # .to_pandas() elif isinstance(self._data, pl.Series): - data = self._data.to_pandas() - elif isinstance(self._data, pl.LazyFrame): - data = self._data.select(columns).collect().to_pandas() + data = self._data.to_frame() # .to_pandas() + # elif isinstance(self._data, pl.LazyFrame): + # data = self._data.select(columns).collect()# .to_pandas() else: raise ValueError('Only Polars DataFrame, Series, and LazyFrame are supported') + x = x or 'index' + if x == 'index': + data = data.with_row_index() + import narwhals as nw + + data = nw.from_native(data) return HoloViewsConverter(data, x, y, kind=kind, **params) diff --git a/hvplot/util.py b/hvplot/util.py index 15a0f8f9c..5a338c3aa 100644 --- a/hvplot/util.py +++ b/hvplot/util.py @@ -426,7 +426,7 @@ def is_list_like(obj): def is_tabular(data): - if check_library(data, ['dask', 'streamz', 'pandas', 'geopandas', 'cudf']): + if check_library(data, ['dask', 'streamz', 'pandas', 'geopandas', 'cudf', 'narwhals']): return True elif check_library(data, 'intake'): from intake.source.base import DataSource @@ -438,7 +438,7 @@ def is_tabular(data): def is_series(data): - if not check_library(data, ['dask', 'streamz', 'pandas', 'cudf']): + if not check_library(data, ['dask', 'streamz', 'pandas', 'cudf', 'narwhals']): return False elif isinstance(data, pd.Series): return True @@ -454,6 +454,10 @@ def is_series(data): import cudf return isinstance(data, cudf.Series) + elif check_library(data, 'narwhals'): + import narwhals as nw + + return isinstance(data, nw.Series) else: return False @@ -487,6 +491,14 @@ def is_duckdb(data): return isinstance(data, (duckdb.DuckDBPyRelation, duckdb.DuckDBPyConnection)) +def is_narwhals(data): + if not check_library(data, 'narwhals'): + return False + import narwhals as nw + + return isinstance(data, (nw.DataFrame, nw.Series, nw.LazyFrame)) + + def is_polars(data): if not check_library(data, 'polars'): return False @@ -530,7 +542,7 @@ def is_xarray(data): def is_lazy_data(data): """Check if data is lazy - This checks if the datatype is Dask, Ibis, or Polars' LazyFrame. + This checks if the datatype is Dask, Ibis, Polars' LazyFrame, or Narwhals' LazyFrame. It is useful to avoid eager evaluation of the data. """ if is_dask(data) or is_ibis(data): @@ -539,6 +551,10 @@ def is_lazy_data(data): import polars as pl return isinstance(data, pl.LazyFrame) + elif is_narwhals(data): + import narwhals as nw + + return isinstance(data, nw.LazyFrame) return False diff --git a/pixi.toml b/pixi.toml index 09838cd59..c4d731357 100644 --- a/pixi.toml +++ b/pixi.toml @@ -55,13 +55,17 @@ pip = "*" # Required bokeh = ">=3.1" colorcet = ">=2" -holoviews = ">=1.19.0" +# holoviews = ">=1.19.0" # NOTE: REVERT numpy = ">=1.21" packaging = "*" pandas = ">=1.3" -panel = ">=1.0" +# panel = ">=1.0" param = ">=1.12.0,<3.0" +[feature.required.pypi-dependencies] +holoviews = { git = "https://github.com/holoviz/holoviews" , branch = "feat_narwhals"} # NOTE: REVERT +panel = ">=1.8.0rc0" # Until an release + [feature.required.tasks] download-data = 'python scripts/download_data.py' install = 'python -m pip install --no-deps --disable-pip-version-check -e .' @@ -205,11 +209,11 @@ pygraphviz = "*" [feature.test.tasks] test-unit-geo = 'pytest -v hvplot --geo' -test-unit-cov = 'pytest -v hvplot --cov=hvplot --cov-branch --cov-append' +test-unit-cov = 'pytest hvplot --cov=hvplot --cov-branch --cov-append -k polars' # NOTE: REVERT test-unit-geo-cov = 'pytest -v hvplot --geo --cov=hvplot --cov-branch --cov-append' [feature.test-core.tasks] -test-unit = 'pytest -v hvplot' +test-unit = 'pytest hvplot -k polars' # NOTE: REVERT [feature.test-example.tasks] test-example = 'pytest -n logical --dist loadscope --nbval-lax -p no:python'