|
9 | 9 | import numpy as np |
10 | 10 | import pytest |
11 | 11 |
|
| 12 | +from iris.exceptions import TranslationError |
12 | 13 | from iris.fileformats.netcdf._bytecoding_datasets import ( |
13 | 14 | DECODE_TO_STRINGS_ON_READ, |
14 | 15 | EncodedDataset, |
@@ -224,17 +225,28 @@ def test_overlength(self, tempdir): |
224 | 225 | strlen = 5 |
225 | 226 | ds = make_encoded_dataset(path, strlen=strlen, encoding="ascii") |
226 | 227 | v = ds.variables["vxs"] |
227 | | - v[:] = ["1", "123456789", "two"] |
228 | | - expected_bytes = make_bytearray(["1", "12345", "two"], strlen) |
229 | | - check_raw_content(path, "vxs", expected_bytes) |
| 228 | + msg = r"String .* written to netcdf exceeds string dimension .* : [0-9]* > 5\." |
| 229 | + with pytest.raises(TranslationError, match=msg): |
| 230 | + v[:] = ["1", "123456789", "two"] |
230 | 231 |
|
231 | 232 | def test_overlength_splitcoding(self, tempdir): |
232 | 233 | # Check expected behaviour when non-ascii multibyte coding gets truncated |
233 | 234 | path = tempdir / "test_writestrings_overlength_splitcoding.nc" |
234 | 235 | strlen = 5 |
235 | 236 | ds = make_encoded_dataset(path, strlen=strlen, encoding="utf-8") |
236 | 237 | v = ds.variables["vxs"] |
237 | | - v[:] = ["1", "1234ü", "two"] |
| 238 | + # Note: we must do the assignment as a single byte array, to avoid hitting the |
| 239 | + # safety check for this exact problem : see previous check. |
| 240 | + byte_arrays = [ |
| 241 | + string.encode("utf-8")[:strlen] for string in ("1", "1234ü", "two") |
| 242 | + ] |
| 243 | + nd_bytes_array = np.array( |
| 244 | + [ |
| 245 | + [bytes[i : i + 1] if i < len(bytes) else b"\0" for i in range(strlen)] |
| 246 | + for bytes in byte_arrays |
| 247 | + ] |
| 248 | + ) |
| 249 | + v[:] = nd_bytes_array |
238 | 250 | # This creates a problem: it won't read back |
239 | 251 | msg = ( |
240 | 252 | "Character data in variable 'vxs' could not be decoded " |
|
0 commit comments