perf: Port FrozenOrderedSet to rust by tobni · Pull Request #23200 · pantsbuild/pants

tobni · 2026-03-29T13:23:48Z

Followup to #22501. Same approach — FrozenOrderedSet is now a pyo3 #[pyclass] backed by Py<PyDict> with lazy hash via OnceLock. The end goal is porting more rule code to rust intrinsics.

"""Benchmark: Rust FrozenOrderedSet vs Python FrozenOrderedSet."""

import sys
import timeit
from collections.abc import Hashable, Iterable, Iterator
from typing import AbstractSet, Any, TypeVar

sys.path.insert(0, "src/python")

from pants.engine.internals.native_engine import FrozenOrderedSet as RustFrozenOrderedSet

T = TypeVar("T")


class PyFrozenOrderedSet(AbstractSet[T], Hashable):
    """The old pure-Python FrozenOrderedSet (pre-port)."""

    def __init__(self, iterable=None):
        self._items = dict.fromkeys(iterable) if iterable else {}
        self._hash = None

    def __len__(self):
        return len(self._items)

    def __contains__(self, key):
        return key in self._items

    def __iter__(self) -> Iterator:
        return iter(self._items)

    def __reversed__(self):
        return reversed(tuple(self._items.keys()))

    def __eq__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        return len(self._items) == len(other._items) and all(
            x == y for x, y in zip(self._items, other._items)
        )

    def __hash__(self):
        if self._hash is None:
            self._hash = 0
            for item in self._items.keys():
                self._hash ^= hash(item)
        return self._hash

    def __repr__(self):
        return f"PyFrozenOrderedSet({list(self)!r})"

    def __bool__(self):
        return bool(self._items)

    def union(self, other):
        return self.__class__(list(self) + [x for x in other if x not in self._items])

    def intersection(self, other):
        s = set(other)
        return self.__class__(x for x in self if x in s)

    def difference(self, other):
        s = set(other)
        return self.__class__(x for x in self if x not in s)

    def issubset(self, other):
        if len(self) > len(other):
            return False
        return all(item in other for item in self)


WARMUP = 1000

def measure(stmt, number, globs):
    timeit.timeit(stmt, number=WARMUP, globals=globs)
    t = timeit.timeit(stmt, number=number, globals=globs)
    return t / number * 1_000_000


BENCHMARKS = [
    ("Construction", "Cls(data)", lambda data, py, rs, **_: [
        {"Cls": PyFrozenOrderedSet, "data": data},
        {"Cls": RustFrozenOrderedSet, "data": data},
    ]),
    ("hash()", "hash(fd)", lambda py, rs, **_: [
        {"fd": py},
        {"fd": rs},
    ]),
    ("__contains__", "k in fd", lambda py, rs, mid, **_: [
        {"fd": py, "k": mid},
        {"fd": rs, "k": mid},
    ]),
    ("__contains__ miss", "k in fd", lambda py, rs, **_: [
        {"fd": py, "k": "MISSING"},
        {"fd": rs, "k": "MISSING"},
    ]),
    ("__eq__", "fd == fd2", lambda py, rs, py2, rs2, **_: [
        {"fd": py, "fd2": py2},
        {"fd": rs, "fd2": rs2},
    ]),
    ("iteration", "list(fd)", lambda py, rs, **_: [
        {"fd": py},
        {"fd": rs},
    ]),
    ("union", "fd.union(other)", lambda py, rs, py_other, rs_other, **_: [
        {"fd": py, "other": py_other},
        {"fd": rs, "other": rs_other},
    ]),
    ("intersection", "fd.intersection(other)", lambda py, rs, py_other, rs_other, **_: [
        {"fd": py, "other": py_other},
        {"fd": rs, "other": rs_other},
    ]),
    ("difference", "fd.difference(other)", lambda py, rs, py_other, rs_other, **_: [
        {"fd": py, "other": py_other},
        {"fd": rs, "other": rs_other},
    ]),
    ("issubset", "small.issubset(fd)", lambda py, rs, py_small, rs_small, **_: [
        {"small": py_small, "fd": py},
        {"small": rs_small, "fd": rs},
    ]),
    ("dict key", "d[fd]", lambda py, rs, **_: [
        {"fd": py, "d": {py: 1}},
        {"fd": rs, "d": {rs: 1}},
    ]),
]

SMALL = list(range(5))
MEDIUM = list(range(20))
LARGE = list(range(200))

DATASETS = [("small (5)", SMALL), ("medium (20)", MEDIUM), ("large (200)", LARGE)]

all_results: dict[str, dict[str, tuple[float, float]]] = {}

for ds_name, data in DATASETS:
    print(f"\n{'=' * 60}")
    print(f"  Dataset: {ds_name}")
    print(f"{'=' * 60}")

    py = PyFrozenOrderedSet(data)
    rs = RustFrozenOrderedSet(data)
    py2 = PyFrozenOrderedSet(data)
    rs2 = RustFrozenOrderedSet(data)
    half = data[:len(data) // 2]
    py_other = PyFrozenOrderedSet(half + list(range(1000, 1000 + len(half))))
    rs_other = RustFrozenOrderedSet(half + list(range(1000, 1000 + len(half))))
    py_small = PyFrozenOrderedSet(data[:3])
    rs_small = RustFrozenOrderedSet(data[:3])
    # Warm up lazy hashes
    for obj in (py, py2, py_other, py_small):
        hash(obj)
    n = 500_000 if len(data) <= 20 else 50_000
    mid = data[len(data) // 2]

    ctx = dict(data=data, py=py, rs=rs, py2=py2, rs2=rs2, mid=mid,
               py_other=py_other, rs_other=rs_other, py_small=py_small, rs_small=rs_small)

    for bench_name, stmt, make_globs in BENCHMARKS:
        py_globs, rs_globs = make_globs(**ctx)
        py_us = measure(stmt, n, py_globs)
        rs_us = measure(stmt, n, rs_globs)
        print(f"  {bench_name:.<20s} Python {py_us:8.3f} µs  Rust {rs_us:8.3f} µs  ({py_us / rs_us:.1f}x)")
        all_results.setdefault(bench_name, {})[ds_name] = (py_us, rs_us)

ds_names = [name for name, _ in DATASETS]
header = f"  {'Operation':<20s}" + "".join(f" | {name:>12s}" for name in ds_names)
sep = f"  {'-'*20}" + "".join(f"-+-{'-'*12}" for _ in ds_names)

print(f"\n{'=' * 60}")
print("  Summary (Python / Rust speedup)")
print(f"{'=' * 60}")
print(header)
print(sep)
for bench_name, _, _ in BENCHMARKS:
    row = f"  {bench_name:<20s}"
    for ds_name in ds_names:
        py_us, rs_us = all_results[bench_name][ds_name]
        ratio = py_us / rs_us
        row += f" | {ratio:11.1f}x"
    print(row)

  Operation            |    small (5) |  medium (20) |  large (200)
  ---------------------+--------------+--------------+-------------
  Construction         |         1.2x |         1.1x |         1.0x
  hash()               |         2.2x |         2.2x |         2.2x
  __contains__         |         1.8x |         1.7x |         1.7x
  __contains__ miss    |         1.8x |         1.8x |         1.8x
  __eq__               |         3.6x |         2.0x |         1.4x
  iteration            |         1.5x |         1.4x |         1.1x
  union                |         4.1x |         3.0x |         2.5x
  intersection         |         2.7x |         1.7x |         1.2x
  difference           |         3.0x |         1.8x |         1.3x
  issubset             |         7.1x |         6.8x |         7.0x
  dict key             |         2.1x |         2.0x |         2.0x

cburroughs · 2026-03-30T15:47:17Z

Really wish I knew rust better for all these cool performance cases. Cross referencing: #14719

perf: Port frozendict to rust

35c0b24

tobni force-pushed the add/port-frozen-ordered-set branch from 42a564e to ca376c9 Compare March 29, 2026 13:25

tobni added category:internal CI, fixes for not-yet-released features, etc. release-notes:not-required [CI] PR doesn't require mention in release notes labels Mar 29, 2026

tobni force-pushed the add/port-frozen-ordered-set branch 6 times, most recently from 027bd5b to ee17c67 Compare March 29, 2026 17:59

perf: Port FrozenOrderedSet to rust

0bf4cba

tobni force-pushed the add/port-frozen-ordered-set branch from ee17c67 to 0bf4cba Compare March 29, 2026 18:45

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

perf: Port FrozenOrderedSet to rust#23200

perf: Port FrozenOrderedSet to rust#23200
tobni wants to merge 2 commits intopantsbuild:mainfrom
tobni:add/port-frozen-ordered-set

tobni commented Mar 29, 2026 •

edited

Loading

Uh oh!

cburroughs commented Mar 30, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Uh oh!

Conversation

tobni commented Mar 29, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

cburroughs commented Mar 30, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

tobni commented Mar 29, 2026 •

edited

Loading