de #450

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft

Ghesselink wants to merge 1 commit into development from gh_performance_experimentation

Draft

de #450

features/counter.py

-Original file line number
+Diff line change
@@ -0,0 +1,28 @@
+    # import gc
+    # def count_entity_instances():
+    #     gc.collect()
+    #     return sum(1 for o in gc.get_objects()
+    #                if type(o).__name__ == "entity_instance"
+    #                and "ifcopenshell" in type(o).__module__)
+    import gc
+    from types import ModuleType
+    def is_ifc_entity(o):
+        t = type(o)
+        # looser but reliable: name + module prefix
+        return (t.__name__ == "entity_instance"
+                and isinstance(__import__(t.__module__.split('.')[0]), ModuleType)  # module exists
+                and t.__module__.startswith("ifcopenshell"))
+    def count_entity_instances():
+        gc.collect()
+        objs = gc.get_objects()
+        return sum(1 for o in objs if is_ifc_entity(o))
+    # sanity: do these wrappers appear in gc.get_objects?
+    def seen_in_gc(objs):
+        gc.collect()
+        ids_in_gc = {id(o) for o in gc.get_objects()}
+        return sum(1 for o in objs if id(o) in ids_in_gc)

features/steps/utils/misc.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,9 @@ @@
     import operator
     import pyparsing
+    from typing import Iterable, Iterator, Union, Optional
+    import numpy as np
+    from numbers import Real
     def reverse_operands(fn):
         """
@@ Expand Down Expand Up / @@ -190,3 +193,231 @@ def get_stack_tree(context): @@
         """Returns the stack tree of the current context. To be used for 'attribute stacking', e.g. in GEM004"""
         return list(
             filter(None, list(map(lambda layer: layer.get('instances'), context._stack))))
+    TNum = Union[int, float, np.number]
+    def to_numeric_id(r):
+        inst = r.instance_id
+        return inst.id() if isinstance(inst, ifcopenshell.entity_instance) else inst # add ifcopenshell.rocksdb_lazy_instance too?
+    class ContiguousSet:
+        """
+        A set-like container for numeric values backed by:
+          - a sorted, contiguous NumPy array (committed data) for memory compactness
+          - a Python set for pending inserts (amortized commits)
+        Features
+        --------
+        - Only accepts numeric (int/float/np.number); rejects bool and non-finite values (NaN/Inf).
+        - Membership: O(log n) via binary search on the array.
+        - Iteration yields sorted ascending order without forcing commit.
+        - No remove/discard support
+        - `commit()` merges pending into the array; auto-commit when `pending_max` is reached.
+        """
+        __slots__ = ("_arr", "_pending", "_dtype", "_pending_max")
+        def __init__(
+            self,
+            data: Optional[Iterable[TNum]] = None,
+            *,
+            dtype: np.dtype = np.int64,
+            pending_max: int = 1024,
+        ) -> None:
+            """
+            Parameters
+            ----------
+            data : optional iterable of numeric
+                Initial values (deduplicated).
+            dtype : NumPy dtype, default float64
+                Storage dtype for the NumPy array.
+            pending_max : int, default 1024
+                Auto-commit threshold for number of pending items.
+            """
+            self._dtype = np.dtype(dtype)
+            self._pending_max = int(pending_max)
+            self._arr = np.ascontiguousarray(np.array([], dtype=self._dtype))
+            self._pending: set = set()
+            if data is not None:
+                self.update(data)
+                self.commit()
+        @staticmethod
+        def _is_number(x) -> bool:
+            return isinstance(x, Real) and not isinstance(x, bool)
+        def _check_numeric(self, x):
+            if not self._is_number(x):
+                raise TypeError(f"Only numeric (int/float) values allowed, got {type(x).__name__}")
+            # Reject NaN/Inf — they break equality/ordering semantics
+            if not np.isfinite(x):
+                raise ValueError("Values must be finite (no NaN/Inf).")
+        def _in_array(self, x: TNum) -> bool:
+            if self._arr.size == 0:
+                return False
+            idx = np.searchsorted(self._arr, x, side="left")
+            return (idx < self._arr.size) and (self._arr[idx] == x)
+        def _maybe_autocommit(self) -> None:
+            if len(self._pending) >= self._pending_max:
+                self.commit()
+        def add(self, x: TNum) -> None:
+            """Add a numeric value. No effect if it's already present."""
+            self._check_numeric(x)
+            if x in self._pending:
+                return
+            if self._in_array(x):
+                return
+            self._pending.add(x)
+            self._maybe_autocommit()
+        def update(self, values: Iterable[TNum]) -> None:
+            """Add many values efficiently (does not force commit)."""
+            new_items = []
+            for v in values:
+                self._check_numeric(v)
+                if (v in self._pending) or self._in_array(v):
+                    continue
+                new_items.append(v)
+            if new_items:
+                self._pending.update(new_items)
+                self._maybe_autocommit()
+        def commit(self) -> None:
+            """Merge pending items into the contiguous sorted NumPy array."""
+            if not self._pending:
+                return
+            pend_arr = np.fromiter(self._pending, dtype=self._dtype, count=len(self._pending))
+            if self._arr.size == 0:
+                merged = np.unique(pend_arr)
+            else:
+                merged = np.union1d(self._arr, pend_arr)  # sorted, unique
+            self._arr = np.ascontiguousarray(merged.astype(self._dtype, copy=False))
+            self._pending.clear()
+        def __contains__(self, x: object) -> bool:
+            if not self._is_number(x):
+                return False
+            return (x in self._pending) or self._in_array(x)
+        def __len__(self) -> int:
+            # pending items are maintained disjoint from the array
+            return int(self._arr.size + len(self._pending))
+        def __iter__(self) -> Iterator[TNum]:
+            """Iterate in ascending order without forcing a commit."""
+            if not self._pending:
+                # Fast path: just yield the array
+                yield from self._arr.tolist()
+                return
+            pend_sorted = sorted(self._pending)
+            i = j = 0
+            n, m = self._arr.size, len(pend_sorted)
+            while i < n and j < m:
+                ai = self._arr[i]
+                bj = pend_sorted[j]
+                if ai < bj:
+                    yield ai; i += 1
+                elif bj < ai:
+                    yield bj; j += 1
+                else:
+                    yield ai; i += 1; j += 1
+            while i < n:
+                yield self._arr[i]; i += 1
+            while j < m:
+                yield pend_sorted[j]; j += 1
+        def __repr__(self) -> str:
+            cls = self.__class__.__name__
+            preview = list(self.__iter__())
+            return f"{cls}({preview!r}, dtype={self._dtype!r}, pending={len(self._pending)})"
+        def clear(self) -> None:
+            """Remove all items (both committed and pending)."""
+            self._arr = np.ascontiguousarray(np.array([], dtype=self._dtype))
+            self._pending.clear()
+        def copy(self) -> "ContiguousSet":
+            """Shallow copy with copies of array & pending set."""
+            new = ContiguousSet(dtype=self._dtype, pending_max=self._pending_max)
+            new._arr = self._arr.copy()
+            new._pending = set(self._pending)
+            return new
+        def isdisjoint(self, other: Iterable[TNum]) -> bool:
+            s_other = {x for x in other if self._is_number(x)}
+            if self._pending.intersection(s_other):
+                return False
+            for x in s_other:
+                if self._in_array(x):
+                    return False
+            return True
+        def issubset(self, other: Iterable[TNum]) -> bool:
+            s_other = {x for x in other if self._is_number(x)}
+            if not self._pending.issubset(s_other):
+                return False
+            for x in self._arr:
+                if x not in s_other:
+                    return False
+            return True
+        def union(self, other: Iterable[TNum]) -> "ContiguousSet":
+            out = self.copy()
+            out.update(other)
+            return out
+        def to_numpy(self, *, commit: bool = False) -> np.ndarray:
+            if commit:
+                self.commit()
+                return self._arr
+            if not self._pending:
+                return self._arr.copy()
+            merged = np.fromiter(self.__iter__(), dtype=self._dtype, count=len(self))
+            return np.ascontiguousarray(merged)
+        @property
+        def dtype(self) -> np.dtype:
+            return self._dtype
+        def _resolve_if_id(model, x):
+            if isinstance(x, int) and not isinstance(x, bool):
+                return model.by_id(x)
+            return x
+        @property
+        def pending_size(self) -> int:
+            return len(self._pending)
+        @property
+        def pending_max(self) -> int:
+            return self._pending_max
+        @pending_max.setter
+        def pending_max(self, value: int) -> None:
+            self._pending_max = int(value)
+    import gc
+    from types import ModuleType
+    def is_ifc_entity(o):
+        t = type(o)
+        # looser but reliable: name + module prefix
+        return (t.__name__ == "entity_instance"
+                and isinstance(__import__(t.__module__.split('.')[0]), ModuleType)  # module exists
+                and t.__module__.startswith("ifcopenshell"))
+    def count_entity_instances():
+        gc.collect()
+        objs = gc.get_objects()
+        return sum(1 for o in objs if is_ifc_entity(o))
+    # sanity: do these wrappers appear in gc.get_objects?
+    def seen_in_gc(objs):
+        gc.collect()
+        ids_in_gc = {id(o) for o in gc.get_objects()}
+        return sum(1 for o in objs if id(o) in ids_in_gc)

features/steps/validation_handling.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -125,22 +125,32 @@ def handle_given(context, fn, **kwargs):
  
        2) Set an initial set of instances ('Given an IfcAlignment' -> [IfcAlignm, IfcAlignm, IfcAlign])

        3) Filter the set of IfcAlignment based on a value ('Given attribute == X' -> [IfcAlignm, None, IfcAlignm])

        4) Set instances to a given attribute ('Given its attribute Representation') -> [IfcProdDefShape, IfcProdDefShape, IfcProdDefShape]

        """

        """  

        if 'inst' not in inspect.getargs(fn.__code__).args:

            gen = fn(context, **kwargs)

            if gen: # (2) Set initial set of instances

                insts = list(gen)

                context.instances = list(map(attrgetter('instance_id'), filter(lambda res: res.severity == OutcomeSeverity.PASSED, insts)))

                ids = misc.ContiguousSet(

                    map(misc.to_numeric_id, filter(lambda r: r.severity == OutcomeSeverity.PASSED, gen))

                )

                ids.commit()

                context.instances = ids

                pass

            else:

                pass # (1) -> context.applicable is set within the function ; replace this with a simple True/False and set applicability here?

        else:

            context._push('attribute') # for attribute stacking

            depth = next(map(int, re.findall(r'at depth (\d+)$', context.step.name)), None)

            resolved = list(map(lambda x: misc.ContiguousSet._resolve_if_id(context.model, x), context.instances))

            if depth is not None:

                context.instances = list(filter(None, map_given_state(context.instances, fn, context, depth=depth, **kwargs)))

                context.instances = list(filter(None, map_given_state(resolved, fn, context, depth=depth, **kwargs)))

            else:

                context.instances = map_given_state(context.instances, fn, context, **kwargs)

                ids = misc.ContiguousSet(

                    inst.id() if isinstance(inst := r, ifcopenshell.entity_instance) else inst

                    for r in map_given_state(resolved, fn, context, **kwargs)

                )

                ids.commit()

                context.instances = ids

    def apply_operation(fn, inst, context, **kwargs):

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

de #450

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

de #450

Are you sure you want to change the base?

Uh oh!

de #450

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!