ci: Make it a bit greener (#3241)

FBruzzesi · web-flow · commit b7abaf9f93b3 · 2025-10-25T20:47:56.000+01:00
* ci: Make it a bit greener

* skip scan parquet sqlframe+windows case

* change get_categories from elementwise to filtration, change fixture name

* more pinning
diff --git a/.github/workflows/downstream_tests.yml b/.github/workflows/downstream_tests.yml
@@ -187,6 +187,8 @@ jobs:
         run: |
           cd py-shiny
           . .venv/bin/activate
+          # temporary pin to get CI green
+          uv pip install "flake8-bugbear<25.10.21"
           make narwhals-install-shiny
       - name: install-narwhals-dev
         run: |
diff --git a/.github/workflows/pytest-pyspark.yml b/.github/workflows/pytest-pyspark.yml
@@ -74,7 +74,8 @@ jobs:
           java-version: 17
 
       - name: install-reqs
-        run: uv pip install -e . --group core-tests --group extra --system
+        # temporary pin to get CI green
+        run: uv pip install -e . --group core-tests --group extra "pyarrow<22.0" --system
       - name: install pyspark
         run: echo "setuptools<78" | uv pip install -e . "pyspark[connect]==${SPARK_VERSION}" --system
       - name: show-deps
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -141,13 +141,9 @@ jobs:
           enable-cache: "true"
           cache-suffix: python-314-${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
-      - name: install pyarrow nightly
-        run: |
-          uv pip uninstall pyarrow --system
-          uv pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pyarrow -U --system
       - name: install-reqs
         # Use `--pre` as duckdb stable not compatible with 3.14
-        run: uv pip install -e . --group tests --pre pandas polars duckdb sqlframe  --system
+        run: uv pip install -e . --group tests --pre pandas polars pyarrow duckdb sqlframe  --system
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
@@ -172,12 +168,8 @@ jobs:
           enable-cache: "true"
           cache-suffix: python-314t-${{ matrix.python-version }}
           cache-dependency-glob: "pyproject.toml"
-      - name: install pyarrow nightly
-        run: |
-          uv pip uninstall pyarrow --system
-          uv pip install -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple pyarrow -U --system
       - name: install-reqs
-        run: uv pip install -e . --group tests --pre pandas --system
+        run: uv pip install -e . --group tests --pre pandas pyarrow --system
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
diff --git a/narwhals/expr_cat.py b/narwhals/expr_cat.py
@@ -34,6 +34,6 @@ def get_categories(self) -> ExprT:
             │ mango  │
             └────────┘
         """
-        return self._expr._with_elementwise(
+        return self._expr._with_filtration(
             lambda plx: self._expr._to_compliant_expr(plx).cat.get_categories()
         )
diff --git a/tests/expr_and_series/cat/get_categories_test.py b/tests/expr_and_series/cat/get_categories_test.py
@@ -3,12 +3,13 @@
 import pytest
 
 import narwhals as nw
+from narwhals.exceptions import InvalidOperationError
 from tests.utils import PYARROW_VERSION, ConstructorEager, assert_equal_data
 
 data = {"a": ["one", "two", "two"]}
 
 
-def test_get_categories(constructor_eager: ConstructorEager) -> None:
+def test_get_categories_eager(constructor_eager: ConstructorEager) -> None:
     if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (15, 0, 0):
         pytest.skip()
 
@@ -23,6 +24,21 @@ def test_get_categories(constructor_eager: ConstructorEager) -> None:
     assert_equal_data({"a": result_series}, expected)
 
 
+def test_get_categories_lazy(constructor_eager: ConstructorEager) -> None:
+    if "pyarrow_table" in str(constructor_eager) and PYARROW_VERSION < (15, 0, 0):
+        pytest.skip()
+
+    df = nw.from_native(constructor_eager(data)).lazy()
+    expr = nw.col("a").cast(nw.Categorical).cat.get_categories()
+    msg = "Length-changing expressions are not supported for use in LazyFrame"
+    with pytest.raises(InvalidOperationError, match=msg):
+        df.select(expr).collect()
+
+    result = df.select(expr.min())
+    expected = {"a": ["one"]}
+    assert_equal_data(result, expected)
+
+
 def test_get_categories_pyarrow() -> None:
     pytest.importorskip("pyarrow")
     import pyarrow as pa
diff --git a/tests/expr_and_series/cum_min_test.py b/tests/expr_and_series/cum_min_test.py
@@ -7,6 +7,7 @@
     DUCKDB_VERSION,
     PANDAS_VERSION,
     POLARS_VERSION,
+    PYARROW_VERSION,
     Constructor,
     ConstructorEager,
     assert_equal_data,
@@ -97,7 +98,7 @@ def test_lazy_cum_min_ordered_by_nulls(
     if "cudf" in str(constructor):
         # https://github.com/rapidsai/cudf/issues/18159
         request.applymarker(pytest.mark.xfail)
-    if "pyarrow" in str(constructor) and is_windows():
+    if "pyarrow" in str(constructor) and is_windows() and PYARROW_VERSION < (22, 0):
         # https://github.com/pandas-dev/pandas/issues/62477
         request.applymarker(pytest.mark.xfail)
 
diff --git a/tests/expr_and_series/cum_sum_test.py b/tests/expr_and_series/cum_sum_test.py
@@ -6,6 +6,7 @@
 from tests.utils import (
     DUCKDB_VERSION,
     POLARS_VERSION,
+    PYARROW_VERSION,
     Constructor,
     ConstructorEager,
     assert_equal_data,
@@ -98,7 +99,7 @@ def test_lazy_cum_sum_ordered_by_nulls(
     if "cudf" in str(constructor):
         # https://github.com/rapidsai/cudf/issues/18159
         request.applymarker(pytest.mark.xfail)
-    if "pyarrow" in str(constructor) and is_windows():
+    if "pyarrow" in str(constructor) and is_windows() and PYARROW_VERSION < (22, 0):
         # https://github.com/pandas-dev/pandas/issues/62477
         request.applymarker(pytest.mark.xfail)
 
diff --git a/tests/expr_and_series/dt/truncate_test.py b/tests/expr_and_series/dt/truncate_test.py
@@ -7,7 +7,7 @@
 import pytest
 
 import narwhals as nw
-from tests.utils import Constructor, ConstructorEager, assert_equal_data
+from tests.utils import POLARS_VERSION, Constructor, ConstructorEager, assert_equal_data
 
 data = {
     "a": [datetime(2021, 3, 1, 12, 34, 56, 49012), datetime(2020, 1, 2, 2, 4, 14, 715123)]
@@ -54,9 +54,13 @@ def test_truncate(
     expected: list[datetime],
 ) -> None:
     if every.endswith("ns") and any(
-        x in str(constructor) for x in ("polars", "duckdb", "pyspark", "ibis")
+        x in str(constructor) for x in ("duckdb", "pyspark", "ibis")
     ):
         request.applymarker(pytest.mark.xfail())
+
+    if every.endswith("ns") and "polars" in str(constructor) and POLARS_VERSION < (1, 35):
+        request.applymarker(pytest.mark.xfail())
+
     if any(every.endswith(x) for x in ("mo", "q", "y")) and any(
         x in str(constructor) for x in ("dask", "cudf")
     ):
@@ -109,14 +113,16 @@ def test_truncate_multiples(
         # - cudf: https://github.com/rapidsai/cudf/issues/18654
         # - pyspark/sqlframe: Only multiple 1 is currently supported
         request.applymarker(pytest.mark.xfail())
-    if every.endswith("ns") and any(
-        x in str(constructor) for x in ("polars", "duckdb", "ibis")
-    ):
+
+    if every.endswith("ns") and any(x in str(constructor) for x in ("duckdb", "ibis")):
         request.applymarker(pytest.mark.xfail())
-    if any(every.endswith(x) for x in ("mo", "q", "y")) and any(
-        x in str(constructor) for x in ("dask",)
-    ):
+
+    if every.endswith("ns") and "polars" in str(constructor) and POLARS_VERSION < (1, 35):
+        request.applymarker(pytest.mark.xfail())
+
+    if any(every.endswith(x) for x in ("mo", "q", "y")) and "dask" in str(constructor):
         request.applymarker(pytest.mark.xfail(reason="Not implemented"))
+
     df = nw.from_native(constructor(data))
     result = df.select(nw.col("a").dt.truncate(every))
     assert_equal_data(result, {"a": expected})
diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py
@@ -11,6 +11,7 @@
     PANDAS_VERSION,
     Constructor,
     assert_equal_data,
+    is_windows,
     pyspark_session,
     sqlframe_session,
 )
@@ -141,6 +142,10 @@ def test_read_parquet_raise_with_lazy(backend: _LazyOnly) -> None:
 @skipif_pandas_lt_1_5
 def test_scan_parquet(parquet_path: FileSource, constructor: Constructor) -> None:
     kwargs: dict[str, Any]
+    if "sqlframe" in str(constructor) and is_windows():
+        reason = "_duckdb.IOException: IO Error: No files found that match the pattern"
+        pytest.skip(reason)
+
     if "sqlframe" in str(constructor):
         kwargs = {"session": sqlframe_session(), "inferSchema": True}
     elif "pyspark" in str(constructor):
diff --git a/tests/testing/assert_series_equal_test.py b/tests/testing/assert_series_equal_test.py
@@ -31,7 +31,7 @@ def series_from_native(native: IntoSeriesT) -> nw.Series[IntoSeriesT]:
 
 
 def test_self_equal(
-    constructor_eager: ConstructorEager, data: Data, schema: IntoSchema
+    constructor_eager: ConstructorEager, testing_data: Data, testing_schema: IntoSchema
 ) -> None:
     """Test that a series is equal to itself, including nested dtypes with nulls."""
     if "pandas" in str(constructor_eager):
@@ -52,9 +52,10 @@ def test_self_equal(
 
     if "pyarrow_table" in str(constructor_eager):
         # Replace Enum with Categorical, since Pyarrow does not support Enum
-        schema = {**schema, "enum": nw.Categorical()}
-
-    df = nw.from_native(constructor_eager(data), eager_only=True)
+        schema = {**testing_schema, "enum": nw.Categorical()}
+    else:
+        schema = dict(testing_schema)  # make a copy
+    df = nw.from_native(constructor_eager(testing_data), eager_only=True)
     for name, dtype in schema.items():
         assert_series_equal(df[name].cast(dtype), df[name].cast(dtype))
 
diff --git a/tests/testing/conftest.py b/tests/testing/conftest.py
@@ -12,8 +12,8 @@
     from tests.conftest import Data
 
 
-@pytest.fixture
-def schema() -> IntoSchema:
+@pytest.fixture(scope="module")
+def testing_schema() -> IntoSchema:
     return {
         "int": nw.Int32(),
         "float": nw.Float32(),
@@ -32,8 +32,8 @@ def schema() -> IntoSchema:
     }
 
 
-@pytest.fixture
-def data() -> Data:
+@pytest.fixture(scope="module")
+def testing_data() -> Data:
     return {
         "int": [1, 2, 3, 4],
         "float": [1.0, float("nan"), float("inf"), None],

Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,6 @@ def get_categories(self) -> ExprT:`
`34`	`34`	`│ mango │`
`35`	`35`	`└────────┘`
`36`	`36`	`"""`
`37`		`- return self._expr._with_elementwise(`
	`37`	`+ return self._expr._with_filtration(`
`38`	`38`	`lambda plx: self._expr._to_compliant_expr(plx).cat.get_categories()`
`39`	`39`	`)`