diff --git a/features/counter.py b/features/counter.py new file mode 100644 index 000000000..04d7cc904 --- /dev/null +++ b/features/counter.py @@ -0,0 +1,28 @@ +# import gc +# def count_entity_instances(): +# gc.collect() +# return sum(1 for o in gc.get_objects() +# if type(o).__name__ == "entity_instance" +# and "ifcopenshell" in type(o).__module__) + + +import gc +from types import ModuleType + +def is_ifc_entity(o): + t = type(o) + # looser but reliable: name + module prefix + return (t.__name__ == "entity_instance" + and isinstance(__import__(t.__module__.split('.')[0]), ModuleType) # module exists + and t.__module__.startswith("ifcopenshell")) + +def count_entity_instances(): + gc.collect() + objs = gc.get_objects() + return sum(1 for o in objs if is_ifc_entity(o)) + +# sanity: do these wrappers appear in gc.get_objects? +def seen_in_gc(objs): + gc.collect() + ids_in_gc = {id(o) for o in gc.get_objects()} + return sum(1 for o in objs if id(o) in ids_in_gc) \ No newline at end of file diff --git a/features/steps/utils/misc.py b/features/steps/utils/misc.py index 79eaca05d..b2ed4dba7 100644 --- a/features/steps/utils/misc.py +++ b/features/steps/utils/misc.py @@ -2,6 +2,9 @@ import operator import pyparsing +from typing import Iterable, Iterator, Union, Optional +import numpy as np +from numbers import Real def reverse_operands(fn): """ @@ -190,3 +193,231 @@ def get_stack_tree(context): """Returns the stack tree of the current context. To be used for 'attribute stacking', e.g. in GEM004""" return list( filter(None, list(map(lambda layer: layer.get('instances'), context._stack)))) + +TNum = Union[int, float, np.number] + +def to_numeric_id(r): + inst = r.instance_id + return inst.id() if isinstance(inst, ifcopenshell.entity_instance) else inst # add ifcopenshell.rocksdb_lazy_instance too? + +class ContiguousSet: + """ + A set-like container for numeric values backed by: + - a sorted, contiguous NumPy array (committed data) for memory compactness + - a Python set for pending inserts (amortized commits) + + Features + -------- + - Only accepts numeric (int/float/np.number); rejects bool and non-finite values (NaN/Inf). + - Membership: O(log n) via binary search on the array. + - Iteration yields sorted ascending order without forcing commit. + - No remove/discard support + - `commit()` merges pending into the array; auto-commit when `pending_max` is reached. + """ + + __slots__ = ("_arr", "_pending", "_dtype", "_pending_max") + + def __init__( + self, + data: Optional[Iterable[TNum]] = None, + *, + dtype: np.dtype = np.int64, + pending_max: int = 1024, + ) -> None: + """ + Parameters + ---------- + data : optional iterable of numeric + Initial values (deduplicated). + dtype : NumPy dtype, default float64 + Storage dtype for the NumPy array. + pending_max : int, default 1024 + Auto-commit threshold for number of pending items. + """ + self._dtype = np.dtype(dtype) + self._pending_max = int(pending_max) + self._arr = np.ascontiguousarray(np.array([], dtype=self._dtype)) + self._pending: set = set() + if data is not None: + self.update(data) + self.commit() + + @staticmethod + def _is_number(x) -> bool: + return isinstance(x, Real) and not isinstance(x, bool) + + def _check_numeric(self, x): + if not self._is_number(x): + raise TypeError(f"Only numeric (int/float) values allowed, got {type(x).__name__}") + # Reject NaN/Inf — they break equality/ordering semantics + if not np.isfinite(x): + raise ValueError("Values must be finite (no NaN/Inf).") + + def _in_array(self, x: TNum) -> bool: + if self._arr.size == 0: + return False + idx = np.searchsorted(self._arr, x, side="left") + return (idx < self._arr.size) and (self._arr[idx] == x) + + def _maybe_autocommit(self) -> None: + if len(self._pending) >= self._pending_max: + self.commit() + + def add(self, x: TNum) -> None: + """Add a numeric value. No effect if it's already present.""" + self._check_numeric(x) + if x in self._pending: + return + if self._in_array(x): + return + self._pending.add(x) + self._maybe_autocommit() + + def update(self, values: Iterable[TNum]) -> None: + """Add many values efficiently (does not force commit).""" + new_items = [] + for v in values: + self._check_numeric(v) + if (v in self._pending) or self._in_array(v): + continue + new_items.append(v) + if new_items: + self._pending.update(new_items) + self._maybe_autocommit() + + def commit(self) -> None: + """Merge pending items into the contiguous sorted NumPy array.""" + if not self._pending: + return + pend_arr = np.fromiter(self._pending, dtype=self._dtype, count=len(self._pending)) + if self._arr.size == 0: + merged = np.unique(pend_arr) + else: + merged = np.union1d(self._arr, pend_arr) # sorted, unique + self._arr = np.ascontiguousarray(merged.astype(self._dtype, copy=False)) + self._pending.clear() + + def __contains__(self, x: object) -> bool: + if not self._is_number(x): + return False + return (x in self._pending) or self._in_array(x) + + def __len__(self) -> int: + # pending items are maintained disjoint from the array + return int(self._arr.size + len(self._pending)) + + def __iter__(self) -> Iterator[TNum]: + """Iterate in ascending order without forcing a commit.""" + if not self._pending: + # Fast path: just yield the array + yield from self._arr.tolist() + return + + pend_sorted = sorted(self._pending) + i = j = 0 + n, m = self._arr.size, len(pend_sorted) + while i < n and j < m: + ai = self._arr[i] + bj = pend_sorted[j] + if ai < bj: + yield ai; i += 1 + elif bj < ai: + yield bj; j += 1 + else: + yield ai; i += 1; j += 1 + while i < n: + yield self._arr[i]; i += 1 + while j < m: + yield pend_sorted[j]; j += 1 + + def __repr__(self) -> str: + cls = self.__class__.__name__ + preview = list(self.__iter__()) + return f"{cls}({preview!r}, dtype={self._dtype!r}, pending={len(self._pending)})" + + def clear(self) -> None: + """Remove all items (both committed and pending).""" + self._arr = np.ascontiguousarray(np.array([], dtype=self._dtype)) + self._pending.clear() + + def copy(self) -> "ContiguousSet": + """Shallow copy with copies of array & pending set.""" + new = ContiguousSet(dtype=self._dtype, pending_max=self._pending_max) + new._arr = self._arr.copy() + new._pending = set(self._pending) + return new + + def isdisjoint(self, other: Iterable[TNum]) -> bool: + s_other = {x for x in other if self._is_number(x)} + if self._pending.intersection(s_other): + return False + for x in s_other: + if self._in_array(x): + return False + return True + + def issubset(self, other: Iterable[TNum]) -> bool: + s_other = {x for x in other if self._is_number(x)} + if not self._pending.issubset(s_other): + return False + for x in self._arr: + if x not in s_other: + return False + return True + + def union(self, other: Iterable[TNum]) -> "ContiguousSet": + out = self.copy() + out.update(other) + return out + + def to_numpy(self, *, commit: bool = False) -> np.ndarray: + if commit: + self.commit() + return self._arr + if not self._pending: + return self._arr.copy() + merged = np.fromiter(self.__iter__(), dtype=self._dtype, count=len(self)) + return np.ascontiguousarray(merged) + + @property + def dtype(self) -> np.dtype: + return self._dtype + + def _resolve_if_id(model, x): + if isinstance(x, int) and not isinstance(x, bool): + return model.by_id(x) + return x + + @property + def pending_size(self) -> int: + return len(self._pending) + + @property + def pending_max(self) -> int: + return self._pending_max + + @pending_max.setter + def pending_max(self, value: int) -> None: + self._pending_max = int(value) + + +import gc +from types import ModuleType + +def is_ifc_entity(o): + t = type(o) + # looser but reliable: name + module prefix + return (t.__name__ == "entity_instance" + and isinstance(__import__(t.__module__.split('.')[0]), ModuleType) # module exists + and t.__module__.startswith("ifcopenshell")) + +def count_entity_instances(): + gc.collect() + objs = gc.get_objects() + return sum(1 for o in objs if is_ifc_entity(o)) + +# sanity: do these wrappers appear in gc.get_objects? +def seen_in_gc(objs): + gc.collect() + ids_in_gc = {id(o) for o in gc.get_objects()} + return sum(1 for o in objs if id(o) in ids_in_gc) \ No newline at end of file diff --git a/features/steps/validation_handling.py b/features/steps/validation_handling.py index 0e61649d6..0e0da1e61 100644 --- a/features/steps/validation_handling.py +++ b/features/steps/validation_handling.py @@ -125,22 +125,32 @@ def handle_given(context, fn, **kwargs): 2) Set an initial set of instances ('Given an IfcAlignment' -> [IfcAlignm, IfcAlignm, IfcAlign]) 3) Filter the set of IfcAlignment based on a value ('Given attribute == X' -> [IfcAlignm, None, IfcAlignm]) 4) Set instances to a given attribute ('Given its attribute Representation') -> [IfcProdDefShape, IfcProdDefShape, IfcProdDefShape] - """ + """ if 'inst' not in inspect.getargs(fn.__code__).args: gen = fn(context, **kwargs) if gen: # (2) Set initial set of instances - insts = list(gen) - context.instances = list(map(attrgetter('instance_id'), filter(lambda res: res.severity == OutcomeSeverity.PASSED, insts))) + + ids = misc.ContiguousSet( + map(misc.to_numeric_id, filter(lambda r: r.severity == OutcomeSeverity.PASSED, gen)) + ) + ids.commit() + context.instances = ids pass else: pass # (1) -> context.applicable is set within the function ; replace this with a simple True/False and set applicability here? else: context._push('attribute') # for attribute stacking depth = next(map(int, re.findall(r'at depth (\d+)$', context.step.name)), None) + resolved = list(map(lambda x: misc.ContiguousSet._resolve_if_id(context.model, x), context.instances)) if depth is not None: - context.instances = list(filter(None, map_given_state(context.instances, fn, context, depth=depth, **kwargs))) + context.instances = list(filter(None, map_given_state(resolved, fn, context, depth=depth, **kwargs))) else: - context.instances = map_given_state(context.instances, fn, context, **kwargs) + ids = misc.ContiguousSet( + inst.id() if isinstance(inst := r, ifcopenshell.entity_instance) else inst + for r in map_given_state(resolved, fn, context, **kwargs) + ) + ids.commit() + context.instances = ids def apply_operation(fn, inst, context, **kwargs):