chore: rudimentary Python benchmarks & support more Polars types (#4442)

danking · web-flow · commit 1710744f4768 · 2025-09-03T00:35:04.000+01:00
I had to patch a hole in Polars type conversion as well.

I do not understand why duckdb is so slow. Maybe Arrow conversion is
very expensive?

Ideally there should be no gap between `test_scan_XXX` and each engine's
XXX benchmark because all of these queries are simple O(N) scans.

```
------------------------------------------------------------------------------------------------ benchmark: 15 tests -------------------------------------------------------------------------------------------------
Name (time in us)                        Min                   Max                  Mean              StdDev                Median                 IQR            Outliers           OPS            Rounds  Iterations
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_repeated_scan_scalar_at          1.0829 (1.0)         12.7910 (1.0)          1.9084 (1.0)        0.5208 (1.0)          1.7919 (1.0)        0.6661 (1.0)      8534;503  523,994.6207 (1.0)       36811           1
test_scan_scalar_at                  32.4170 (29.94)      212.0000 (16.57)       40.4745 (21.21)      4.1199 (7.91)        40.1658 (22.42)      4.1660 (6.25)     1217;209   24,706.9329 (0.05)       7470           1
test_repeated_scan                  122.7499 (113.35)     237.9999 (18.61)      140.3071 (73.52)      5.5270 (10.61)      141.7079 (79.08)      2.9160 (4.38)      445;426    7,127.2225 (0.01)       2293           1
test_scan                           148.0419 (136.71)     555.2911 (43.41)      172.1939 (90.23)     30.2790 (58.14)      166.6665 (93.01)      7.2501 (10.88)       22;81    5,807.4083 (0.01)        810           1
test_polars_scalar_at               161.6669 (149.29)     451.8752 (35.33)      216.6078 (113.50)    23.6843 (45.48)      214.8750 (119.92)    24.1671 (36.28)     811;107    4,616.6396 (0.01)       3865           1
test_duckdb_scalar_at               183.4580 (169.41)   1,157.4170 (90.49)      266.8048 (139.80)    56.7218 (108.92)     258.9581 (144.52)    64.0000 (96.08)     556;120    3,748.0582 (0.01)       2596           1
test_polars                         188.6250 (174.19)     497.0001 (38.86)      252.7230 (132.43)    43.1075 (82.78)      242.3751 (135.26)    33.5106 (50.31)     193;100    3,956.9012 (0.01)       1277           1
test_polars_streaming_scalar_at     213.6659 (197.31)     822.5001 (64.30)      282.8750 (148.22)    38.8552 (74.61)      275.7079 (153.87)    25.8966 (38.88)     366;230    3,535.1300 (0.01)       3096           1
test_scan_filter                    360.1250 (332.56)     527.9169 (41.27)      408.9275 (214.28)    26.6184 (51.11)      403.7919 (225.35)    28.4789 (42.75)      263;68    2,445.4213 (0.00)       1109           1
test_repeated_scan_filter           378.2089 (349.26)     475.5841 (37.18)      410.9510 (215.34)    16.5570 (31.79)      412.9999 (230.49)    10.5419 (15.83)     484;450    2,433.3800 (0.00)       1709           1
test_polars_filter                  397.4999 (367.07)     957.6660 (74.87)      479.0831 (251.04)    43.1060 (82.77)      468.7500 (261.60)    40.5819 (60.92)      182;40    2,087.3206 (0.00)       1006           1
test_polars_streaming               435.4171 (402.09)   1,088.7499 (85.12)      556.2495 (291.47)    51.5738 (99.03)      548.3751 (306.04)    38.2287 (57.39)       79;43    1,797.7544 (0.00)        571           1
test_polars_streaming_filter        621.7081 (574.12)   1,215.5001 (95.03)      712.6453 (373.42)    55.5044 (106.58)     699.2079 (390.21)    68.1449 (102.30)     217;14    1,403.2226 (0.00)       1001           1
test_duckdb_filter                  909.5829 (839.95)   2,197.5001 (171.80)   1,169.5916 (612.86)   148.8653 (285.86)   1,147.1671 (640.21)   220.3751 (330.83)      181;2      854.9993 (0.00)        563           1
test_duckdb                         920.4999 (850.04)   1,703.5829 (133.19)   1,262.7979 (661.70)   182.6230 (350.68)   1,228.4580 (685.58)   335.4165 (503.53)      266;0      791.8923 (0.00)        644           1
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```

Signed-off-by: Daniel King &lt;dan@spiraldb.com&gt;
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -80,6 +80,11 @@ jobs:
           uv run --all-packages pytest --benchmark-disable test/
         working-directory: vortex-python/
 
+      - name: Pytest Benchmarks - Vortex
+        run: |
+          uv run --all-packages pytest --benchmark-only benchmark/
+        working-directory: vortex-python/
+
       - name: Doctest - PyVortex
         run: |
           uv run --all-packages make doctest
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,6 +33,7 @@ dev-dependencies = [
     "pytest>=7.4.0",
     "ruff>=0.7.1",
     "ray>=2.48",
+    "pytest-benchmark>=5.1.0",
 ]
 
 [tool.uv.workspace]
diff --git a/uv.lock b/uv.lock
diff --git a/vortex-python/benchmark/__init__.py b/vortex-python/benchmark/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
diff --git a/vortex-python/benchmark/conftest.py b/vortex-python/benchmark/conftest.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+import hashlib
+import math
+import os
+
+import pyarrow as pa
+import pytest
+
+import vortex as vx
+
+
+@pytest.fixture(
+    scope="session",
+    params=[{"x"}, {"x", "y"}, {"x", "z"}, {"x", "y", "z"}],
+    ids=["int", "int_str", "int_float", "int_str_float"],
+)
+def vxf(tmpdir_factory: pytest.TempPathFactory, request: pytest.FixtureRequest) -> vx.VortexFile:
+    fname = tmpdir_factory.mktemp("data") / "foo.vortex"
+
+    if not os.path.exists(fname):
+        length = 100_000
+
+        columns: dict[str, list[int] | list[float] | list[str]] = {}
+        assert "x" in request.param  # pyright: ignore[reportAny]
+        columns["x"] = list(range(length))
+
+        if "y" in request.param:  # pyright: ignore[reportAny]
+            columns["y"] = [hashlib.md5(x.to_bytes(length=4), usedforsecurity=False).hexdigest() for x in range(length)]
+        if "z" in request.param:  # pyright: ignore[reportAny]
+            columns["z"] = [math.sqrt(x) for x in range(length)]
+
+        a = vx.array(pa.table(columns))  # pyright: ignore[reportCallIssue, reportUnknownArgumentType, reportArgumentType]
+        vx.io.write(a, str(fname))
+    return vx.open(str(fname))
diff --git a/vortex-python/benchmark/test_aggregation.py b/vortex-python/benchmark/test_aggregation.py
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+from typing import Literal
+
+import duckdb
+import pyarrow as pa
+import pytest
+from pyarrow.types import is_floating, is_integer
+from pytest_benchmark.fixture import BenchmarkFixture  # pyright: ignore[reportMissingTypeStubs]
+
+import vortex as vx
+
+
+def _has_mean(t: pa.DataType) -> bool:
+    return is_integer(t) or is_floating(t)
+
+
+@pytest.mark.benchmark(group="aggregation", disable_gc=True)
+def test_arrow_table_aggregation(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    aggregations: list[tuple[str, Literal["mean"]]] = [
+        (field.name, "mean")
+        for field in vxf.dtype.to_arrow_schema()  # pyright: ignore[reportUnknownVariableType]
+        if _has_mean(field.type)  # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType]
+    ]
+    benchmark(lambda: pa.concat_tables(x.to_arrow_table() for x in vxf.scan()).group_by([]).aggregate(aggregations))
+
+
+@pytest.mark.benchmark(group="aggregation", disable_gc=True)
+def test_polars_aggregation(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.mean().collect().to_arrow())
+
+
+@pytest.mark.benchmark(group="aggregation", disable_gc=True)
+def test_polars_streaming_aggregation(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.mean().collect(engine="streaming").to_arrow())
+
+
+@pytest.mark.benchmark(group="aggregation", disable_gc=True)
+def test_duckdb_aggregation(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    conn = duckdb.connect(database=":memory:")  # pyright: ignore[reportUnknownMemberType]
+    ds = vxf.to_dataset()
+    _ = conn.register("ds", ds)
+    aggregations = ",".join(
+        [f"avg(ds.{field.name}) as {field.name}" for field in vxf.dtype.to_arrow_schema() if _has_mean(field.type)]  # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType, reportUnknownArgumentType]
+    )
+    print(aggregations)
+    query = f"select {aggregations} from ds"
+    benchmark(lambda: conn.sql(query).to_arrow_table())
diff --git a/vortex-python/benchmark/test_filter.py b/vortex-python/benchmark/test_filter.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+import duckdb
+import polars as pl
+import pyarrow as pa
+import pytest
+from pytest_benchmark.fixture import BenchmarkFixture  # pyright: ignore[reportMissingTypeStubs]
+
+import vortex as vx
+from vortex.expr import column
+
+
+@pytest.mark.benchmark(group="filter", disable_gc=True)
+def test_scan_filter(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    benchmark(lambda: pa.concat_tables(x.to_arrow_table() for x in vxf.scan(expr=column("x") >= 50_000)))
+
+
+@pytest.mark.benchmark(group="filter", disable_gc=True)
+def test_repeated_scan_filter(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    rscan = vxf.to_repeated_scan(expr=column("x") > 50_000)
+    benchmark(lambda: pa.concat_tables(x.to_arrow_table() for x in rscan.execute()))
+
+
+@pytest.mark.benchmark(group="filter", disable_gc=True)
+def test_polars_filter(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.filter(pl.col("x") >= pl.lit(50_000).cast(pl.Int64)).collect().to_arrow())
+
+
+@pytest.mark.benchmark(group="filter", disable_gc=True)
+def test_polars_streaming_filter(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.filter(pl.col("x") >= pl.lit(50_000).cast(pl.Int64)).collect(engine="streaming").to_arrow())
+
+
+@pytest.mark.benchmark(group="filter", disable_gc=True)
+def test_duckdb_filter(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    conn = duckdb.connect(database=":memory:")  # pyright: ignore[reportUnknownMemberType]
+    ds = vxf.to_dataset()
+    _ = conn.register("ds", ds)
+    benchmark(lambda: conn.sql("select ds.x from ds where x >= 50000").to_arrow_table())
diff --git a/vortex-python/benchmark/test_scalar_at.py b/vortex-python/benchmark/test_scalar_at.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+import duckdb
+import pyarrow as pa
+import pytest
+from pytest_benchmark.fixture import BenchmarkFixture  # pyright: ignore[reportMissingTypeStubs]
+
+import vortex as vx
+
+
+@pytest.mark.benchmark(group="scalar_at", disable_gc=True)
+def test_scan_scalar_at(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    benchmark(lambda: pa.concat_tables(x.to_arrow_table() for x in vxf.scan(indices=vx.array([50_000]))))
+
+
+@pytest.mark.benchmark(group="scalar_at", disable_gc=True)
+def test_repeated_scan_scalar_at(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    rscan = vxf.to_repeated_scan()
+    benchmark(lambda: rscan.scalar_at(50_000))
+
+
+@pytest.mark.benchmark(group="scalar_at", disable_gc=True)
+def test_polars_scalar_at(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.slice(50_000, 50_001).collect().to_arrow())
+
+
+@pytest.mark.benchmark(group="scalar_at", disable_gc=True)
+def test_polars_streaming_scalar_at(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.slice(50_000, 50_001).collect(engine="streaming").to_arrow())
+
+
+@pytest.mark.benchmark(group="scalar_at", disable_gc=True)
+def test_duckdb_scalar_at(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    conn = duckdb.connect(database=":memory:")  # pyright: ignore[reportUnknownMemberType]
+    ds = vxf.to_dataset()
+    _ = conn.register("ds", ds)
+    benchmark(lambda: conn.sql("select ds.x from ds offset 50000 limit 1").to_arrow_table())
diff --git a/vortex-python/benchmark/test_scan.py b/vortex-python/benchmark/test_scan.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright the Vortex contributors
+
+import duckdb
+import pyarrow as pa
+import pytest
+from pytest_benchmark.fixture import BenchmarkFixture  # pyright: ignore[reportMissingTypeStubs]
+
+import vortex as vx
+
+
+@pytest.mark.benchmark(group="scan", disable_gc=True)
+def test_scan(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    benchmark(lambda: pa.concat_tables(x.to_arrow_table() for x in vxf.scan()))
+
+
+@pytest.mark.benchmark(group="scan", disable_gc=True)
+def test_repeated_scan(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    rscan = vxf.to_repeated_scan()
+    benchmark(lambda: pa.concat_tables(x.to_arrow_table() for x in rscan.execute()))
+
+
+@pytest.mark.benchmark(group="scan", disable_gc=True)
+def test_polars(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.collect().to_arrow())
+
+
+@pytest.mark.benchmark(group="scan", disable_gc=True)
+def test_polars_streaming(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    lf = vxf.to_polars()
+    benchmark(lambda: lf.collect(engine="streaming").to_arrow())
+
+
+@pytest.mark.benchmark(group="scan", disable_gc=True)
+def test_duckdb(benchmark: BenchmarkFixture, vxf: vx.VortexFile):
+    conn = duckdb.connect(database=":memory:")  # pyright: ignore[reportUnknownMemberType]
+    ds = vxf.to_dataset()
+    _ = conn.register("ds", ds)
+    benchmark(lambda: conn.sql("select ds.x from ds").to_arrow_table())
diff --git a/vortex-python/pyproject.toml b/vortex-python/pyproject.toml
@@ -72,4 +72,5 @@ dev = [
     "pandas-stubs>=2.2.3.241126",
     "pcodec>=0.3.3",
     "pyarrow-stubs>=17.16",
+    "pytest-benchmark>=5.1.0",
 ]
diff --git a/vortex-python/python/vortex/polars_.py b/vortex-python/python/vortex/polars_.py
@@ -84,8 +84,20 @@ def _polars_to_vortex(expr: dict[str, Any]) -> ve.Expr:  # pyright: ignore[repor
         elif "Float" in scalar:
             value = scalar["Float"]  # pyright: ignore[reportAny]
             dtype = "Float64"
+        elif "Float32" in scalar:
+            value = scalar["Float32"]  # pyright: ignore[reportAny]
+            dtype = "Float32"
+        elif "Float64" in scalar:
+            value = scalar["Float64"]  # pyright: ignore[reportAny]
+            dtype = "Float64"
+        elif "Int32" in scalar:
+            value = scalar["Int32"]  # pyright: ignore[reportAny]
+            dtype = "Int32"
+        elif "Int64" in scalar:
+            value = scalar["Int64"]  # pyright: ignore[reportAny]
+            dtype = "Int64"
         else:
-            raise ValueError(f"Unsupported Polars scalar value type {scalar}")
+            raise ValueError(f"Cannot convert to Vortex: unsupported Polars scalar value type {scalar}")
 
         return ve.literal(_LITERAL_TYPES[dtype](value), value)
 

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ dev-dependencies = [`
`33`	`33`	`"pytest>=7.4.0",`
`34`	`34`	`"ruff>=0.7.1",`
`35`	`35`	`"ray>=2.48",`
	`36`	`+ "pytest-benchmark>=5.1.0",`
`36`	`37`	`]`
`37`	`38`
`38`	`39`	`[tool.uv.workspace]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+# SPDX-FileCopyrightText: Copyright the Vortex contributors`
Original file line number	Diff line number	Diff line change
`@@ -72,4 +72,5 @@ dev = [`
`72`	`72`	`"pandas-stubs>=2.2.3.241126",`
`73`	`73`	`"pcodec>=0.3.3",`
`74`	`74`	`"pyarrow-stubs>=17.16",`
	`75`	`+ "pytest-benchmark>=5.1.0",`
`75`	`76`	`]`