88data-variables, auxiliary coordinates, ancillary variables and -possibly?- cell measures.
99"""
1010
11- from contextlib import contextmanager
1211from dataclasses import dataclass
1312from pathlib import Path
13+ from typing import Iterable
1414
1515import numpy as np
16+ from numpy .typing import ArrayLike
1617import pytest
1718
1819import iris
20+ from iris .coords import AuxCoord , DimCoord
21+ from iris .cube import Cube
1922from iris .fileformats .netcdf import _thread_safe_nc
2023
2124
@@ -49,8 +52,8 @@ def all_lazy_auxcoords():
4952# Independently defined here, to avoid relying on any code we are testing.
5053#
5154def convert_strings_to_chararray (
52- string_array_1d : np . ndarray , maxlen : int , encoding : str | None = None
53- ):
55+ string_array_1d : ArrayLike , maxlen : int , encoding : str | None = None
56+ ) -> np . ndarray :
5457 # Note: this is limited to 1-D arrays of strings.
5558 # Could generalise that if needed, but for now this makes it simpler.
5659 if encoding is None :
@@ -63,12 +66,13 @@ def convert_strings_to_chararray(
6366
6467
6568def convert_bytearray_to_strings (
66- byte_array , encoding = "utf-8" , string_length : int | None = None
67- ):
69+ byte_array : ArrayLike , encoding : str = "utf-8" , string_length : int | None = None
70+ ) -> np . ndarray :
6871 """Convert bytes to strings.
6972
7073 N.B. for now at least, we assume the string dim is **always the last one**.
7174 """
75+ byte_array = np .asanyarray (byte_array )
7276 bytes_shape = byte_array .shape
7377 var_shape = bytes_shape [:- 1 ]
7478 if string_length is None :
@@ -88,9 +92,9 @@ class SamplefileDetails:
8892 """Convenience container for information about a sample file."""
8993
9094 filepath : Path
91- datavar_data : np . ndarray
92- stringcoord_data : np . ndarray
93- numericcoord_data : np . ndarray
95+ datavar_data : ArrayLike
96+ stringcoord_data : ArrayLike
97+ numericcoord_data : ArrayLike
9498
9599
96100def make_testfile (
@@ -200,7 +204,7 @@ def testdata(
200204 encoding ,
201205 tmp_path ,
202206 use_separate_dims ,
203- ):
207+ ) -> Iterable [ SamplefileDetails ] :
204208 """Create a suitable valid testfile, and return expected string content."""
205209 if PERSIST_TESTFILES :
206210 tmp_path = Path (PERSIST_TESTFILES ).expanduser ()
@@ -218,7 +222,7 @@ def testdata(
218222 from iris .tests .integration .netcdf .test_chararrays import ncdump
219223
220224 # TODO: temporary for debug -- TO REMOVE
221- ncdump (tempfile_path )
225+ ncdump (str ( tempfile_path ) )
222226 yield testdata
223227
224228 def test_valid_encodings (self , encoding , testdata : SamplefileDetails ):
@@ -246,3 +250,144 @@ def test_valid_encodings(self, encoding, testdata: SamplefileDetails):
246250 coord_var_2 = cube .coord ("v_numeric" )
247251 assert coord_var_2 .dtype == np .float64
248252 assert np .all (coord_var_2 .points == numeric_data )
253+
254+
255+ @pytest .fixture (params = ["stringdata" , "bytedata" ])
256+ def as_bytes (request ):
257+ yield request .param == "bytedata"
258+
259+
260+ @dataclass
261+ class SampleCubeDetails :
262+ cube : Cube
263+ datavar_data : np .ndarray
264+ stringcoord_data : np .ndarray
265+ save_path : str | Path | None = None
266+
267+
268+ def make_testcube (
269+ encoding_str : str | None = None ,
270+ byte_data : bool = False ,
271+ ) -> SampleCubeDetails :
272+ data_is_ascii = encoding_str in (NO_ENCODING_STR , "ascii" )
273+
274+ numeric_values = np .arange (3.0 )
275+ if data_is_ascii :
276+ coordvar_strings = ["mOnster" , "London" , "Amsterdam" ]
277+ datavar_strings = ["bun" , "Eclair" , "sandwich" ]
278+ else :
279+ coordvar_strings = ["Münster" , "London" , "Amsterdam" ]
280+ datavar_strings = ["bun" , "éclair" , "sandwich" ]
281+
282+ if not byte_data :
283+ charlen = N_CHARS_DIM
284+ if encoding_str == "utf-32" :
285+ charlen = charlen // 4 - 1
286+ strings_dtype = np .dtype (f"U{ charlen } " )
287+ coordvar_array = np .array (coordvar_strings , dtype = strings_dtype )
288+ datavar_array = np .array (datavar_strings , dtype = strings_dtype )
289+ else :
290+ write_encoding = encoding_str
291+ if write_encoding == NO_ENCODING_STR :
292+ write_encoding = "ascii"
293+ coordvar_array = convert_strings_to_chararray (
294+ coordvar_strings , maxlen = N_CHARS_DIM , encoding = write_encoding
295+ )
296+ datavar_array = convert_strings_to_chararray (
297+ datavar_strings , maxlen = N_CHARS_DIM , encoding = write_encoding
298+ )
299+
300+ cube = Cube (datavar_array , var_name = "v" )
301+ cube .add_dim_coord (DimCoord (np .arange (N_XDIM ), var_name = "x" ), 0 )
302+ if encoding_str != NO_ENCODING_STR :
303+ cube .attributes ["_Encoding" ] = encoding_str
304+ co_x = AuxCoord (coordvar_array , var_name = "v_co" )
305+ if encoding_str != NO_ENCODING_STR :
306+ co_x .attributes ["_Encoding" ] = encoding_str
307+ co_dims = (0 , 1 ) if byte_data else (0 ,)
308+ cube .add_aux_coord (co_x , co_dims )
309+
310+ result = SampleCubeDetails (
311+ cube = cube ,
312+ datavar_data = datavar_array ,
313+ stringcoord_data = coordvar_array ,
314+ )
315+ return result
316+
317+
318+ class TestWriteEncodings :
319+ """Test saving of testfiles with encoded string data.
320+
321+ To avoid circularity, we generate and save *cube* data.
322+ """
323+
324+ @pytest .fixture (params = ["dataAsStrings" , "dataAsBytes" ])
325+ def write_bytes (self , request ):
326+ yield request .param == "dataAsBytes"
327+
328+ @pytest .fixture ()
329+ def testpath (self , encoding , write_bytes , tmp_path ):
330+ """Create a suitable test cube, with either string or byte content."""
331+ if PERSIST_TESTFILES :
332+ tmp_path = Path (PERSIST_TESTFILES ).expanduser ()
333+ if encoding == "<noencoding>" :
334+ filetag = "noencoding"
335+ else :
336+ filetag = encoding
337+ datatag = "writebytes" if write_bytes else "writestrings"
338+ tempfile_path = tmp_path / f"sample_write_{ filetag } _{ datatag } .nc"
339+ yield tempfile_path
340+
341+ @pytest .fixture ()
342+ def testdata (self , testpath , encoding , write_bytes ):
343+ """Create a suitable test cube + save to a file.
344+
345+ Apply the given encoding to both coord and cube data.
346+ Form the data as bytes, or as strings, depending on 'write_bytes'.'
347+ """
348+ cube_info = make_testcube (encoding_str = encoding , byte_data = write_bytes )
349+ cube_info .save_path = testpath
350+ cube = cube_info .cube
351+ iris .save (cube , testpath )
352+ yield cube_info
353+
354+ def test_valid_encodings (self , encoding , testdata , write_bytes ):
355+ cube_info = testdata
356+ cube , path = cube_info .cube , cube_info .save_path
357+ # TODO: not testing the "byte read/write" yet
358+ # Make a quick check for cube equality : but the presentation depends on the read mode
359+ # with DECODE_TO_STRINGS_ON_READ.context(not write_bytes):
360+ # read_cube = iris.load_cube(path)
361+ # assert read_cube == cube
362+
363+ # N.B. file content should not depend on whether bytes or strings were written
364+ vararray , coordarray = cube_info .datavar_data , cube_info .stringcoord_data
365+ ds = _thread_safe_nc .DatasetWrapper (path )
366+ ds .set_auto_chartostring (False )
367+ v_main = ds .variables ["v" ]
368+ v_co = ds .variables ["v_co" ]
369+ assert v_main .shape == (N_XDIM , N_CHARS_DIM )
370+ assert v_co .shape == (N_XDIM , N_CHARS_DIM )
371+ assert v_main .dtype == "<S1"
372+ assert v_co .dtype == "<S1"
373+ if encoding == NO_ENCODING_STR :
374+ assert not "_Encoding" in v_main .ncattrs ()
375+ assert not "_Encoding" in v_co .ncattrs ()
376+ else :
377+ assert v_main .getncattr ("_Encoding" ) == encoding
378+ assert v_co .getncattr ("_Encoding" ) == encoding
379+ data_main = v_main [:]
380+ data_co = v_co [:]
381+ if not write_bytes :
382+ # convert to strings, to compare with originals
383+ # ("ELSE": varrray/coordarray are bytes anyway)
384+ if encoding == NO_ENCODING_STR :
385+ encoding = "ascii"
386+ data_main = convert_bytearray_to_strings (
387+ data_main , encoding , string_length = N_CHARS_DIM
388+ )
389+ data_co = convert_bytearray_to_strings (
390+ data_co , encoding , string_length = N_CHARS_DIM
391+ )
392+ assert np .all (data_main == vararray )
393+ assert np .all (data_co == coordarray )
0 commit comments