Skip to content

Commit 613e78f

Browse files
authored
adding key sorting in npz writing for hashing (#84)
1 parent 6c10681 commit 613e78f

File tree

2 files changed

+52
-1
lines changed

2 files changed

+52
-1
lines changed

src/aiida_atomistic/data/structure/structure.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,8 @@ def _store_properties(self):
315315
# Save all arrays to a single compressed npz file
316316
if repository_dict:
317317
with tempfile.NamedTemporaryFile(suffix='.npz') as handle:
318-
np.savez_compressed(handle, **repository_dict)
318+
# Sort keys to ensure deterministic binary output for hashing
319+
np.savez_compressed(handle, **{k: repository_dict[k] for k in sorted(repository_dict.keys())})
319320
handle.flush()
320321
handle.seek(0)
321322

tests/data/test_structure.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,56 @@ def test_load_properties_from_npz_no_file(aiida_profile_clean):
682682
assert isinstance(props, dict)
683683

684684

685+
def test_npz_deterministic_key_order(aiida_profile_clean):
686+
"""Test that NPZ files have deterministic key ordering for stable hashing."""
687+
import numpy as np
688+
689+
# Create a structure with multiple properties that will be stored in repository
690+
structure = StructureData(
691+
cell=[[3.0, 0, 0], [0, 3.0, 0], [0, 0, 3.0]],
692+
pbc=[True, True, True],
693+
sites=[
694+
{"symbol": "Fe", "position": [0, 0, 0], "charge": 2.0, "magmom": [0, 0, 2.2]},
695+
{"symbol": "O", "position": [1.5, 1.5, 1.5], "charge": -1.0}
696+
]
697+
)
698+
structure.store()
699+
700+
# Load the NPZ file and check key order
701+
npz_data = structure._load_properties_from_npz()
702+
703+
# Keys should be present (exact keys depend on what gets stored in repository)
704+
assert len(npz_data) > 0, "NPZ should contain data"
705+
706+
# Get the keys as a list
707+
keys = list(npz_data.keys())
708+
709+
# Keys should be in sorted order
710+
assert keys == sorted(keys), f"NPZ keys should be sorted, but got: {keys}"
711+
712+
# Create another identical structure - should have same key order
713+
structure2 = StructureData(
714+
cell=[[3.0, 0, 0], [0, 3.0, 0], [0, 0, 3.0]],
715+
pbc=[True, True, True],
716+
sites=[
717+
{"symbol": "Fe", "position": [0, 0, 0], "charge": 2.0, "magmom": [0, 0, 2.2]},
718+
{"symbol": "O", "position": [1.5, 1.5, 1.5], "charge": -1.0}
719+
]
720+
)
721+
structure2.store()
722+
723+
npz_data2 = structure2._load_properties_from_npz()
724+
keys2 = list(npz_data2.keys())
725+
726+
# Key order should be identical
727+
assert keys == keys2, "Identical structures should have same NPZ key order"
728+
729+
# Repository hashes should match (deterministic binary output)
730+
hash1 = structure.base.repository.hash()
731+
hash2 = structure2.base.repository.hash()
732+
assert hash1 == hash2, "Identical structures should have identical repository hashes"
733+
734+
685735
def test_properties_getter_unstored():
686736
"""Test properties getter for unstored node."""
687737
structure = StructureData(

0 commit comments

Comments
 (0)