Skip to content

Commit a7d7826

Browse files
authored
[ENH] Implement ZIP-based model serialization format (#1035)
# Description Refactored the model serialization system to use ZIP archives instead of raw binary files. This change: - Implemented `model_to_bytes()` to replace `model_to_binary()`, creating a ZIP archive with header.json, input.bin, and grid.bin - Added `_load_model_from_bytes()` to replace `_deserialize_binary_file()`, extracting data from the ZIP archive - Updated references in save_model(), load_model(), and test verification helpers - Added imports for io and zipfile modules Relates to #serialization-improvement # Checklist - [ ] My code uses type hinting for function and method arguments and return values. - [ ] I have created tests which cover my code. - [ ] The test code either 1. demonstrates at least one valuable use case (e.g. integration tests) or 2. verifies that outputs are as expected for given inputs (e.g. unit tests). - [ ] New tests pass locally with my changes.
2 parents 08e510d + 929c33b commit a7d7826

File tree

10 files changed

+74
-48
lines changed

10 files changed

+74
-48
lines changed

gempy/core/data/grid.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,11 @@ def grid_binary(self):
9595
return custom_grid_bytes + topography_bytes
9696

9797

98-
_grid_binary_size: int = 0
9998
@computed_field
10099
def binary_meta_data(self) -> dict:
101100
return {
102101
'custom_grid_binary_length': len(self._custom_grid.values.astype("float64").tobytes()) if self._custom_grid else 0,
103102
'topography_binary_length': len(self._topography.values.astype("float64").tobytes()) if self._topography else 0,
104-
'grid_binary_size': self._grid_binary_size
105103
}
106104

107105
@computed_field(alias="active_grids")

gempy/core/data/structural_frame.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,13 +492,11 @@ def deserialize_binary(cls, data: Union["StructuralFrame", dict], constructor: M
492492

493493
# Access the context variable to get injected data
494494

495-
_input_binary_size: int = 0
496495
@computed_field
497496
def binary_meta_data(self) -> dict:
498497
return {
499498
'sp_binary_length': len(self.surface_points_copy.data.tobytes()),
500499
'ori_binary_length': len(self.orientations_copy.data.tobytes()) ,
501-
'input_binary_size': self._input_binary_size
502500
}
503501

504502
# endregion

gempy/modules/serialization/save_load.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from ...optional_dependencies import require_zlib
88
import pathlib
99
import os
10+
import io
11+
import zipfile
1012

1113

1214
def save_model(model: GeoModel, path: str | None = None, validate_serialization: bool = True):
@@ -44,10 +46,10 @@ def save_model(model: GeoModel, path: str | None = None, validate_serialization:
4446
# If no extension, add the valid extension
4547
path = str(path_obj) + VALID_EXTENSION
4648

47-
binary_file = model_to_binary(model)
49+
binary_file = model_to_bytes(model)
4850

4951
if validate_serialization:
50-
model_deserialized = _deserialize_binary_file(binary_file)
52+
model_deserialized = _load_model_from_bytes(binary_file)
5153
_validate_serialization(model, model_deserialized)
5254

5355
# Create directory if it doesn't exist
@@ -129,8 +131,62 @@ def load_model(path: str) -> GeoModel:
129131
with open(path, 'rb') as f:
130132
binary_file = f.read()
131133

132-
return _deserialize_binary_file(binary_file)
134+
return _load_model_from_bytes(binary_file)
133135

136+
def model_to_bytes(model: GeoModel) -> bytes:
137+
# 1) Make a fully deterministic JSON header
138+
# header_dict = model.model_dump(by_alias=True)
139+
# header_json = json.dumps(
140+
# header_dict,
141+
# sort_keys=True, # always sort object keys
142+
# separators=(",", ":"), # no extra whitespace
143+
# ).encode("utf-8")
144+
145+
header_json = model.model_dump_json(by_alias=True, indent=4)
146+
147+
# 2) Raw binary chunks (no additional zlib.compress here)
148+
input_raw = model.structural_frame.input_tables_binary
149+
grid_raw = model.grid.grid_binary
150+
151+
# 3) Pack into a ZIP archive in a fixed order:
152+
153+
buf = io.BytesIO()
154+
with zipfile.ZipFile(
155+
buf, mode="w",
156+
compression=zipfile.ZIP_DEFLATED,
157+
compresslevel=6
158+
) as zf:
159+
# Force a fixed timestamp (1980-01-01) so the file headers don't vary
160+
def make_info(name):
161+
zi = zipfile.ZipInfo(name, date_time=(1980,1,1,0,0,0))
162+
zi.external_attr = 0 # clear OS-specific file permissions
163+
return zi
164+
165+
zf.writestr(make_info("header.json"), header_json)
166+
zf.writestr(make_info("input.bin"), input_raw)
167+
zf.writestr(make_info("grid.bin"), grid_raw)
168+
169+
return buf.getvalue()
170+
171+
def _load_model_from_bytes(data: bytes) -> GeoModel:
172+
import json, zlib
173+
from ...core.data.encoders.converters import loading_model_from_binary
174+
175+
buf = io.BytesIO(data)
176+
with zipfile.ZipFile(buf, "r") as zf:
177+
header_json = zf.read("header.json").decode("utf-8")
178+
# header = json.loads(header_json)
179+
input_raw = zf.read("input.bin")
180+
grid_raw = zf.read("grid.bin")
181+
182+
# If you want to validate or decompress further, do it here…
183+
with loading_model_from_binary(
184+
input_binary=input_raw,
185+
grid_binary= grid_raw
186+
):
187+
model = GeoModel.model_validate_json(header_json)
188+
189+
return model
134190

135191
def _deserialize_binary_file(binary_file):
136192
import json

test/test_modules/_geophysics_TO_UPDATE/test_gravity.test_gravity.verify/2-layers.approved.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,9 @@
5555
],
5656
"is_dirty": true,
5757
"basement_color": "#ffbe00",
58-
"_input_binary_size": 79,
5958
"binary_meta_data": {
6059
"sp_binary_length": 144,
61-
"ori_binary_length": 60,
62-
"input_binary_size": 79
60+
"ori_binary_length": 60
6361
}
6462
},
6563
"grid": {
@@ -104,11 +102,9 @@
104102
},
105103
"_transform": null,
106104
"_octree_levels": -1,
107-
"_grid_binary_size": 8,
108105
"binary_meta_data": {
109106
"custom_grid_binary_length": 0,
110-
"topography_binary_length": 0,
111-
"grid_binary_size": 8
107+
"topography_binary_length": 0
112108
},
113109
"active_grids": 1058
114110
},

test/test_modules/test_faults/test_finite_faults.test_finite_fault_scalar_field_on_fault.verify/fault.approved.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,11 +129,9 @@
129129
],
130130
"is_dirty": true,
131131
"basement_color": "#728f02",
132-
"_input_binary_size": 211,
133132
"binary_meta_data": {
134133
"sp_binary_length": 792,
135-
"ori_binary_length": 300,
136-
"input_binary_size": 211
134+
"ori_binary_length": 300
137135
}
138136
},
139137
"grid": {
@@ -160,11 +158,9 @@
160158
"_centered_grid": null,
161159
"_transform": null,
162160
"_octree_levels": -1,
163-
"_grid_binary_size": 8,
164161
"binary_meta_data": {
165162
"custom_grid_binary_length": 0,
166-
"topography_binary_length": 0,
167-
"grid_binary_size": 8
163+
"topography_binary_length": 0
168164
},
169165
"active_grids": 1025
170166
},

test/test_modules/test_grids/test_custom_grid.test_custom_grid.verify/fold.approved.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,9 @@
6161
],
6262
"is_dirty": true,
6363
"basement_color": "#ffbe00",
64-
"_input_binary_size": 233,
6564
"binary_meta_data": {
6665
"sp_binary_length": 1296,
67-
"ori_binary_length": 120,
68-
"input_binary_size": 233
66+
"ori_binary_length": 120
6967
}
7068
},
7169
"grid": {
@@ -92,11 +90,9 @@
9290
"_centered_grid": null,
9391
"_transform": null,
9492
"_octree_levels": -1,
95-
"_grid_binary_size": 29,
9693
"binary_meta_data": {
9794
"custom_grid_binary_length": 192,
98-
"topography_binary_length": 0,
99-
"grid_binary_size": 29
95+
"topography_binary_length": 0
10096
},
10197
"active_grids": 1029
10298
},

test/test_modules/test_grids/test_grids_sections.test_section_grids.verify/fold.approved.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,9 @@
6161
],
6262
"is_dirty": true,
6363
"basement_color": "#ffbe00",
64-
"_input_binary_size": 233,
6564
"binary_meta_data": {
6665
"sp_binary_length": 1296,
67-
"ori_binary_length": 120,
68-
"input_binary_size": 233
66+
"ori_binary_length": 120
6967
}
7068
},
7169
"grid": {
@@ -154,11 +152,9 @@
154152
"_centered_grid": null,
155153
"_transform": null,
156154
"_octree_levels": -1,
157-
"_grid_binary_size": 37758,
158155
"binary_meta_data": {
159156
"custom_grid_binary_length": 0,
160-
"topography_binary_length": 86400,
161-
"grid_binary_size": 37758
157+
"topography_binary_length": 86400
162158
},
163159
"active_grids": 1049
164160
},

test/test_modules/test_grids/test_grids_sections.test_topography_II.verify/Model1.approved.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,9 @@
9696
],
9797
"is_dirty": true,
9898
"basement_color": "#443988",
99-
"_input_binary_size": 165,
10099
"binary_meta_data": {
101100
"sp_binary_length": 360,
102-
"ori_binary_length": 120,
103-
"input_binary_size": 165
101+
"ori_binary_length": 120
104102
}
105103
},
106104
"grid": {
@@ -154,11 +152,9 @@
154152
"_centered_grid": null,
155153
"_transform": null,
156154
"_octree_levels": -1,
157-
"_grid_binary_size": 20161,
158155
"binary_meta_data": {
159156
"custom_grid_binary_length": 0,
160-
"topography_binary_length": 48000,
161-
"grid_binary_size": 20161
157+
"topography_binary_length": 48000
162158
},
163159
"active_grids": 1034
164160
},

test/test_modules/test_serialize_model.test_generate_horizontal_stratigraphic_model.verify/Horizontal Stratigraphic Model serialization.approved.txt

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,9 @@
6161
],
6262
"is_dirty": true,
6363
"basement_color": "#ffbe00",
64-
"_input_binary_size": 0,
6564
"binary_meta_data": {
6665
"sp_binary_length": 432,
67-
"ori_binary_length": 120,
68-
"input_binary_size": 0
66+
"ori_binary_length": 120
6967
}
7068
},
7169
"grid": {
@@ -92,11 +90,9 @@
9290
"_centered_grid": null,
9391
"_transform": null,
9492
"_octree_levels": -1,
95-
"_grid_binary_size": 0,
9693
"binary_meta_data": {
9794
"custom_grid_binary_length": 0,
98-
"topography_binary_length": 0,
99-
"grid_binary_size": 0
95+
"topography_binary_length": 0
10096
},
10197
"active_grids": 1026
10298
},

test/verify_helper.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@
1111
from approvaltests.reporters import GenericDiffReporter, GenericDiffReporterConfig
1212

1313
from gempy.core.data import GeoModel
14-
from gempy.modules.serialization.save_load import _to_binary, _deserialize_binary_file, model_to_binary
15-
from gempy.optional_dependencies import require_zlib
16-
14+
from gempy.modules.serialization.save_load import _load_model_from_bytes, model_to_bytes
1715

1816
class WSLWindowsDiffReporter(GenericDiffReporter):
1917
def get_command(self, received, approved):
@@ -120,7 +118,7 @@ def verify_model_serialization(model: GeoModel, verify_moment: Literal["before",
120118
Raises:
121119
ValueError: If `verify_moment` is not set to "before" or "after".
122120
"""
123-
binary_file = model_to_binary(model)
121+
binary_file = model_to_bytes(model)
124122

125123
original_model = model
126124
original_model.meta.creation_date = "<DATE_IGNORED>"
@@ -131,7 +129,7 @@ def verify_model_serialization(model: GeoModel, verify_moment: Literal["before",
131129
name=file_name
132130
)
133131
elif verify_moment == "after":
134-
model_deserialized = _deserialize_binary_file(binary_file)
132+
model_deserialized = _load_model_from_bytes(binary_file)
135133
model_deserialized.meta.creation_date = "<DATE_IGNORED>"
136134
verify_json(
137135
item=model_deserialized.model_dump_json(by_alias=True, indent=4),

0 commit comments

Comments
 (0)