Skip to content

Commit 04b0cab

Browse files
Added multifile dataset caching test.
1 parent 3b85b89 commit 04b0cab

File tree

1 file changed

+26
-14
lines changed

1 file changed

+26
-14
lines changed

tests/operations/test_cache.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pathlib
33

44
import pytest
5+
import xarray
56

67
import emsarray
78
import emsarray.operations.cache
@@ -12,6 +13,7 @@
1213
int_hash = '7b08e025e311c3dfcf5179b67c0fdc08e73de261'
1314
attr_hash_lat = "2cb433979fc2d9c3884eea8569dd6a44406950f3"
1415
cache_key_hash_cf1d_sha1 = "2b006999273225ed70d4810357b6a06e6bebe9a6"
16+
cache_key_hash_multifile_cf2d_sha1 = "ea2d2e6131f1e499f622e83ed4fc2415649def06"
1517

1618
# Blake2b
1719
cache_key_hash_cf1d = "1a3226072f08441ee79f727b0775709209ff2965299539c898ecc401cf17e23f"
@@ -202,23 +204,33 @@ def test_cache_key_cfgrid1d_sha1(datasets: pathlib.Path):
202204
assert result_cache_key_cf == cache_key_hash_cf1d_sha1
203205

204206

205-
def test_cache_key_with_missing_data_array_encoding_type(datasets: pathlib.Path):
206-
dataset_ugrid = emsarray.open_dataset(datasets / 'ugrid_mesh2d.nc')
207+
def test_cache_key_with_multifile_dataset(datasets: pathlib.Path):
208+
209+
ugrid_path1 = datasets / 'multiple_dataset/modified_cf1.nc'
210+
ugrid_path2 = datasets / 'multiple_dataset/modified_cf2.nc'
211+
dataset_paths = [ugrid_path1, ugrid_path2]
212+
213+
ugrid_file1 = emsarray.open_dataset(ugrid_path1)
214+
ugrid_file2 = emsarray.open_dataset(ugrid_path2)
215+
216+
multifile_dataset = xarray.open_mfdataset(dataset_paths, data_vars=['values'])
217+
218+
multifile_ds_hash = hashlib.sha1()
219+
220+
multifile_dataset.ems.hash_geometry(multifile_ds_hash)
221+
222+
multifile_ds_digest = multifile_ds_hash.hexdigest()
223+
224+
ugrid_file1_dtype = ugrid_file1.ems.topology.latitude.encoding.get('dtype', None).name
207225

208-
data_array = dataset_ugrid.ems.topology.face_node_connectivity
209-
data_array_dtype_dropped = data_array.copy()
210-
data_array_dtype_dropped.encoding.pop('dtype', None)
226+
ugrid_file2_dtype = ugrid_file2.ems.topology.latitude.encoding.get('dtype', None).name
211227

212-
with_dtype_hash = hashlib.sha1()
213-
without_dtype_hash = hashlib.sha1()
228+
multifile_encoding = multifile_dataset.ems.topology.latitude.encoding.get('dtype', None)
214229

215-
dataset_ugrid.ems.hash_geometry(with_dtype_hash)
216-
dataset_ugrid['mesh_face_node'] = data_array_dtype_dropped
217-
dataset_ugrid.ems.hash_geometry(without_dtype_hash)
230+
assert multifile_encoding is None
218231

219-
with_dtype_digest = with_dtype_hash.hexdigest()
220-
without_dtype_digest = without_dtype_hash.hexdigest()
232+
assert ugrid_file1_dtype is not None
221233

222-
assert with_dtype_digest != without_dtype_digest
234+
assert ugrid_file2_dtype is not None
223235

224-
assert data_array_dtype_dropped.equals(data_array)
236+
assert multifile_ds_digest == cache_key_hash_multifile_cf2d_sha1

0 commit comments

Comments
 (0)