Skip to content

Commit 17d930b

Browse files
Speed up the loading of the entities by using set and also reduce complexity with supplied known hash set (#1316) (#1320)
* Speed up the loading of the entities by using set and also reduce complecity with supplied known hash set * Remove description * Ready * Fix lint
1 parent 16bca9a commit 17d930b

File tree

6 files changed

+609
-37
lines changed

6 files changed

+609
-37
lines changed

flow360/component/simulation/entity_info.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,9 @@ def group_in_registry(
142142
Group items with given attribute_name.
143143
"""
144144
entity_list = self._get_list_of_entities(attribute_name, entity_type_name)
145+
known_frozen_hashes = set()
145146
for item in entity_list:
146-
registry.register(item)
147+
known_frozen_hashes = registry.fast_register(item, known_frozen_hashes)
147148
return registry
148149

149150
def _get_list_of_entities(
@@ -516,14 +517,16 @@ def get_registry(self, internal_registry, **_) -> EntityRegistry:
516517
internal_registry = EntityRegistry()
517518

518519
# Populate boundaries
520+
known_frozen_hashes = set()
519521
# pylint: disable=not-an-iterable
520522
for boundary in self.boundaries:
521-
internal_registry.register(boundary)
523+
known_frozen_hashes = internal_registry.fast_register(boundary, known_frozen_hashes)
522524

523525
# Populate zones
524526
# pylint: disable=not-an-iterable
527+
known_frozen_hashes = set()
525528
for zone in self.zones:
526-
internal_registry.register(zone)
529+
known_frozen_hashes = internal_registry.fast_register(zone, known_frozen_hashes)
527530

528531
return internal_registry
529532

@@ -552,10 +555,11 @@ def get_registry(self, internal_registry, **_) -> EntityRegistry:
552555
if internal_registry is None:
553556
# Initialize the local registry
554557
internal_registry = EntityRegistry()
558+
known_frozen_hashes = set()
555559
# Populate boundaries
556560
# pylint: disable=not-an-iterable
557561
for boundary in self.boundaries:
558-
internal_registry.register(boundary)
562+
known_frozen_hashes = internal_registry.fast_register(boundary, known_frozen_hashes)
559563
return internal_registry
560564
return internal_registry
561565

flow360/component/simulation/framework/entity_base.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import copy
6+
import hashlib
67
import uuid
78
from abc import ABCMeta
89
from collections import defaultdict
@@ -45,6 +46,11 @@ class EntityBase(Flow360BaseModel, metaclass=ABCMeta):
4546

4647
name: str = pd.Field(frozen=True)
4748

49+
# Whether the entity is dirty and needs to be re-hashed
50+
_dirty: bool = pd.PrivateAttr(True)
51+
# Cached hash of the entity
52+
_hash_cache: str = pd.PrivateAttr(None)
53+
4854
def __init__(self, **data):
4955
"""
5056
Initializes a new entity and registers it in the global registry.
@@ -110,9 +116,34 @@ def entity_type(self, value: str):
110116
def __str__(self) -> str:
111117
return "\n".join([f" {attr}: {value}" for attr, value in self.__dict__.items()])
112118

119+
def _recompute_hash(self):
120+
new_hash = hashlib.sha256(self.model_dump_json().encode("utf-8")).hexdigest()
121+
# Can further speed up 10% by using `object.__setattr__`
122+
self._hash_cache = new_hash
123+
self._dirty = False
124+
return new_hash
125+
113126
def _get_hash(self):
114127
"""hash generator to identify if two entities are the same"""
115-
return hash(self.model_dump_json(exclude="private_attribute_id"))
128+
# Can further speed up 10% by using `object.__getattribute__`
129+
dirty = self._dirty
130+
cache = self._hash_cache
131+
if dirty or cache is None:
132+
return self._recompute_hash()
133+
return cache
134+
135+
def __setattr__(self, name, value):
136+
"""
137+
[Large model performance]
138+
Wrapping the __setattr__ to mark the entity as dirty when the attribute is not private
139+
This enables caching the hash of the entity to avoid re-calculating the hash when the entity is not changed.
140+
"""
141+
142+
super().__setattr__(name, value)
143+
if not name.startswith("_") and not self._dirty:
144+
# Not using self to avoid invoking
145+
# Can further speed up 10% by using `object.__setattr__`
146+
self._dirty = True
116147

117148
@property
118149
def id(self) -> str:
@@ -338,8 +369,8 @@ def _get_expanded_entities(
338369
# pylint: disable=arguments-differ
339370
def preprocess(self, **kwargs):
340371
"""
341-
Expand and overwrite self.stored_entities in preparation for submissin/serialization.
342-
Should only be called as late as possible to incoperate all possible changes.
372+
Expand and overwrite self.stored_entities in preparation for submission/serialization.
373+
Should only be called as late as possible to incorporate all possible changes.
343374
"""
344375
# WARNING: this is very expensive all for long lists as it is quadratic
345376
self.stored_entities = self._get_expanded_entities(create_hard_copy=False)

flow360/component/simulation/framework/entity_registry.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,32 @@ class EntityRegistry(Flow360BaseModel):
4343

4444
internal_registry: Dict[str, list[Any]] = pd.Field({})
4545

46+
def fast_register(self, entity: EntityBase, known_frozen_hashes: set[str]) -> set[str]:
47+
"""
48+
Registers an entity in the registry under its type. Suitable for registering a large number of entities.
49+
50+
Parameters:
51+
entity (EntityBase): The entity instance to register.
52+
known_frozen_hashes (Optional[set[str]]): A set of hashes of frozen entities.
53+
This is used to speed up checking if the has is already in the registry by avoiding O(N^2) complexity.
54+
This can be provided when registering a large number of entities.
55+
56+
Returns:
57+
known_frozen_hashes (set[str])
58+
"""
59+
if entity.entity_bucket not in self.internal_registry:
60+
# pylint: disable=unsupported-assignment-operation
61+
self.internal_registry[entity.entity_bucket] = []
62+
63+
# pylint: disable=protected-access
64+
if entity._get_hash() in known_frozen_hashes:
65+
return known_frozen_hashes
66+
known_frozen_hashes.add(entity._get_hash())
67+
68+
# pylint: disable=unsubscriptable-object
69+
self.internal_registry[entity.entity_bucket].append(entity)
70+
return known_frozen_hashes
71+
4672
def register(self, entity: EntityBase):
4773
"""
4874
Registers an entity in the registry under its type.

flow360/component/simulation/framework/param_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,15 +132,16 @@ def register_entity_list(model: Flow360BaseModel, registry: EntityRegistry) -> N
132132
Returns:
133133
None
134134
"""
135+
known_frozen_hashes = set()
135136
for field in model.__dict__.values():
136137
if isinstance(field, EntityBase):
137-
registry.register(field)
138+
known_frozen_hashes = registry.fast_register(field, known_frozen_hashes)
138139

139140
if isinstance(field, EntityList):
140141
# pylint: disable=protected-access
141142
expanded_entities = field._get_expanded_entities(create_hard_copy=False)
142143
for entity in expanded_entities if expanded_entities else []:
143-
registry.register(entity)
144+
known_frozen_hashes = registry.fast_register(entity, known_frozen_hashes)
144145

145146
elif isinstance(field, (list, tuple)):
146147
for item in field:

0 commit comments

Comments
 (0)