Skip to content

Commit 027bd5b

Browse files
committed
perf: Port FrozenOrderedSet to rust
1 parent 35c0b24 commit 027bd5b

File tree

10 files changed

+522
-56
lines changed

10 files changed

+522
-56
lines changed

src/python/pants/backend/python/util_rules/interpreter_constraints.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,14 @@ def for_fixed_python_version(
8989
) -> InterpreterConstraints:
9090
return cls([f"{interpreter_type}=={python_version_str}"])
9191

92-
def __init__(self, constraints: Iterable[str | Requirement] = ()) -> None:
92+
def __new__(cls, constraints: Iterable[str | Requirement] = ()) -> InterpreterConstraints:
9393
# #12578 `parse_constraint` will sort the requirement's component constraints into a stable form.
9494
# We need to sort the component constraints for each requirement _before_ sorting the entire list
9595
# for the ordering to be correct.
9696
parsed_constraints = (
9797
i if isinstance(i, Requirement) else parse_constraint(i) for i in constraints
9898
)
99-
super().__init__(sorted(parsed_constraints, key=lambda c: str(c)))
99+
return super().__new__(cls, sorted(parsed_constraints, key=lambda c: str(c)))
100100

101101
def __str__(self) -> str:
102102
return " OR ".join(str(constraint) for constraint in self)

src/python/pants/backend/python/util_rules/pex.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,6 @@ class CompletePlatforms(DeduplicatedCollection[str]):
133133
sort_input = True
134134

135135
def __init__(self, iterable: Iterable[str] = (), *, digest: Digest = EMPTY_DIGEST):
136-
super().__init__(iterable)
137136
self._digest = digest
138137

139138
@classmethod

src/python/pants/engine/collection.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,11 @@ class Examples(DeduplicatedCollection[Example]):
7979

8080
sort_input: ClassVar[bool] = False
8181

82-
def __init__(self, iterable: Iterable[T] = ()) -> None:
83-
super().__init__(
84-
iterable if not self.sort_input else sorted(iterable) # type: ignore[type-var]
82+
def __new__(cls, iterable: Iterable[T] = (), **_kwargs: object) -> DeduplicatedCollection[T]:
83+
return super().__new__(
84+
cls,
85+
iterable if not cls.sort_input else sorted(iterable), # type: ignore[type-var]
8586
)
8687

8788
def __repr__(self) -> str:
88-
return f"{self.__class__.__name__}({list(self._items)})"
89+
return f"{self.__class__.__name__}({list(self)})"

src/python/pants/engine/internals/native_engine.pyi

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66

77
from __future__ import annotations
88

9-
from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
9+
from collections.abc import Callable, Hashable, Iterable, Iterator, Mapping, Sequence
1010
from datetime import datetime
1111
from io import RawIOBase
1212
from pathlib import Path
13-
from typing import Any, ClassVar, Protocol, Self, TextIO, TypeVar, overload
13+
from typing import AbstractSet, Any, ClassVar, Protocol, Self, TextIO, TypeVar, overload
1414

1515
from pants.engine.fs import (
1616
CreateDigest,
@@ -81,6 +81,35 @@ class FrozenDict(Mapping[K, V]):
8181
def __hash__(self) -> int: ...
8282
def __repr__(self) -> str: ...
8383

84+
T_co = TypeVar("T_co", covariant=True)
85+
86+
class FrozenOrderedSet(AbstractSet[T_co], Hashable):
87+
"""A frozen (i.e. immutable) ordered set backed by Rust.
88+
89+
This is safe to use with the V2 engine.
90+
"""
91+
92+
def __new__(cls, iterable: Iterable[T_co] | None = None) -> Self: ...
93+
def __len__(self) -> int: ...
94+
def __contains__(self, key: Any) -> bool: ...
95+
def __iter__(self) -> Iterator[T_co]: ...
96+
def __reversed__(self) -> Iterator[T_co]: ...
97+
def __hash__(self) -> int: ...
98+
def __eq__(self, other: Any) -> bool: ...
99+
def __or__(self, other: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ... # type: ignore[override] # widens from AbstractSet
100+
def __and__(self, other: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ...
101+
def __sub__(self, other: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ...
102+
def __xor__(self, other: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ... # type: ignore[override] # widens from AbstractSet
103+
def __bool__(self) -> bool: ...
104+
def __repr__(self) -> str: ...
105+
def union(self, *others: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ...
106+
def intersection(self, *others: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ...
107+
def difference(self, *others: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ...
108+
def symmetric_difference(self, other: Iterable[T_co]) -> FrozenOrderedSet[T_co]: ...
109+
def issubset(self, other: Iterable[T_co]) -> bool: ...
110+
def issuperset(self, other: Iterable[T_co]) -> bool: ...
111+
def isdisjoint(self, other: Iterable[T_co]) -> bool: ...
112+
84113
# ------------------------------------------------------------------------------
85114
# Address
86115
# ------------------------------------------------------------------------------

src/python/pants/util/ordered_set.py

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@
1515
from __future__ import annotations
1616

1717
import itertools
18-
from collections.abc import Hashable, Iterable, Iterator, MutableSet
18+
from collections.abc import Iterable, Iterator, MutableSet
1919
from typing import AbstractSet, Any, TypeVar, cast
2020

21+
from pants.engine.internals.native_engine import FrozenOrderedSet as FrozenOrderedSet # noqa: F401
22+
2123
T = TypeVar("T")
2224
T_co = TypeVar("T_co", covariant=True)
2325
_TAbstractOrderedSet = TypeVar("_TAbstractOrderedSet", bound="_AbstractOrderedSet")
@@ -195,21 +197,3 @@ def symmetric_difference_update(self, other: Iterable[T]) -> None:
195197
self._items = {item: None for item in self._items.keys() if item not in items_to_remove}
196198
for item in items_to_add:
197199
self._items[item] = None
198-
199-
200-
class FrozenOrderedSet(_AbstractOrderedSet[T_co], Hashable): # type: ignore[type-var]
201-
"""A frozen (i.e. immutable) set that retains its order.
202-
203-
This is safe to use with the V2 engine.
204-
"""
205-
206-
def __init__(self, iterable: Iterable[T_co] | None = None) -> None:
207-
super().__init__(iterable)
208-
self.__hash: int | None = None
209-
210-
def __hash__(self) -> int:
211-
if self.__hash is None:
212-
self.__hash = 0
213-
for item in self._items.keys():
214-
self.__hash ^= hash(item)
215-
return self.__hash
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// Copyright 2026 Pants project contributors (see CONTRIBUTORS.md).
2+
// Licensed under the Apache License, Version 2.0 (see LICENSE).
3+
4+
use std::fmt::Debug;
5+
use std::sync::OnceLock;
6+
7+
use pyo3::prelude::*;
8+
use pyo3::types::{PyDict, PyIterator};
9+
10+
pub trait HashCache: Debug + Send + Sync {
11+
fn new_eager(hash: isize) -> Self;
12+
fn new_lazy() -> Self;
13+
fn get(
14+
&self,
15+
dict: &Bound<PyDict>,
16+
compute: fn(&Bound<PyDict>) -> PyResult<isize>,
17+
) -> PyResult<isize>;
18+
}
19+
20+
#[derive(Debug)]
21+
pub struct EagerHash(isize);
22+
23+
impl HashCache for EagerHash {
24+
fn new_eager(hash: isize) -> Self {
25+
Self(hash)
26+
}
27+
fn new_lazy() -> Self {
28+
panic!("EagerHash requires a value at construction")
29+
}
30+
fn get(
31+
&self,
32+
_dict: &Bound<PyDict>,
33+
_compute: fn(&Bound<PyDict>) -> PyResult<isize>,
34+
) -> PyResult<isize> {
35+
Ok(self.0)
36+
}
37+
}
38+
39+
#[derive(Debug)]
40+
pub struct LazyHash(OnceLock<isize>);
41+
42+
impl HashCache for LazyHash {
43+
fn new_eager(hash: isize) -> Self {
44+
let lock = OnceLock::new();
45+
let _ = lock.set(hash);
46+
Self(lock)
47+
}
48+
fn new_lazy() -> Self {
49+
Self(OnceLock::new())
50+
}
51+
fn get(
52+
&self,
53+
dict: &Bound<PyDict>,
54+
compute: fn(&Bound<PyDict>) -> PyResult<isize>,
55+
) -> PyResult<isize> {
56+
if let Some(&h) = self.0.get() {
57+
return Ok(h);
58+
}
59+
let h = compute(dict)?;
60+
let _ = self.0.set(h);
61+
Ok(h)
62+
}
63+
}
64+
65+
#[derive(Debug)]
66+
pub struct FrozenCollectionData<H: HashCache = EagerHash> {
67+
pub data: Py<PyDict>,
68+
hash: H,
69+
}
70+
71+
impl<H: HashCache> FrozenCollectionData<H> {
72+
pub fn new(dict: Bound<PyDict>, hash: isize) -> Self {
73+
Self {
74+
data: dict.unbind(),
75+
hash: H::new_eager(hash),
76+
}
77+
}
78+
79+
pub fn new_lazy(dict: Bound<PyDict>) -> Self {
80+
Self {
81+
data: dict.unbind(),
82+
hash: H::new_lazy(),
83+
}
84+
}
85+
86+
pub fn get_hash(
87+
&self,
88+
py: Python,
89+
compute: fn(&Bound<PyDict>) -> PyResult<isize>,
90+
) -> PyResult<isize> {
91+
self.hash.get(&self.data.bind_borrowed(py), compute)
92+
}
93+
94+
pub fn len(&self, py: Python) -> usize {
95+
self.data.bind_borrowed(py).len()
96+
}
97+
98+
pub fn contains(&self, key: &Bound<PyAny>) -> PyResult<bool> {
99+
self.data.bind_borrowed(key.py()).contains(key)
100+
}
101+
102+
pub fn iter<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyIterator>> {
103+
self.data.as_any().bind_borrowed(py).try_iter()
104+
}
105+
106+
pub fn reversed<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyIterator>> {
107+
let keys = self.data.bind_borrowed(py).keys();
108+
keys.reverse()?;
109+
keys.try_iter()
110+
}
111+
}
112+
113+
pub fn xor_hash_keys(dict: &Bound<PyDict>) -> PyResult<isize> {
114+
let mut h: isize = 0;
115+
for key in dict.keys() {
116+
h ^= key.hash()?;
117+
}
118+
Ok(h)
119+
}

0 commit comments

Comments
 (0)