Fix overlength error tests.

pp-mo · pp-mo · commit 10831d77b1c8 · 2026-01-19T16:41:46.000Z
diff --git a/lib/iris/fileformats/netcdf/_bytecoding_datasets.py b/lib/iris/fileformats/netcdf/_bytecoding_datasets.py
@@ -146,8 +146,8 @@ def encode_stringarray_as_bytearray(
             from iris.exceptions import TranslationError
 
             msg = (
-                f"Non-ascii string {string!r} written to netcdf exceeds string "
-                f"dimension : {n_bytes} > {string_dimension_length}."
+                f"String {string!r} written to netcdf exceeds string dimension after "
+                f"encoding : {n_bytes} > {string_dimension_length}."
             )
             raise TranslationError(msg)
 
diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py b/lib/iris/tests/unit/fileformats/netcdf/test_bytecoding_datasets.py
@@ -9,6 +9,7 @@
 import numpy as np
 import pytest
 
+from iris.exceptions import TranslationError
 from iris.fileformats.netcdf._bytecoding_datasets import (
     DECODE_TO_STRINGS_ON_READ,
     EncodedDataset,
@@ -224,17 +225,28 @@ def test_overlength(self, tempdir):
         strlen = 5
         ds = make_encoded_dataset(path, strlen=strlen, encoding="ascii")
         v = ds.variables["vxs"]
-        v[:] = ["1", "123456789", "two"]
-        expected_bytes = make_bytearray(["1", "12345", "two"], strlen)
-        check_raw_content(path, "vxs", expected_bytes)
+        msg = r"String .* written to netcdf exceeds string dimension .* : [0-9]* > 5\."
+        with pytest.raises(TranslationError, match=msg):
+            v[:] = ["1", "123456789", "two"]
 
     def test_overlength_splitcoding(self, tempdir):
         # Check expected behaviour when non-ascii multibyte coding gets truncated
         path = tempdir / "test_writestrings_overlength_splitcoding.nc"
         strlen = 5
         ds = make_encoded_dataset(path, strlen=strlen, encoding="utf-8")
         v = ds.variables["vxs"]
-        v[:] = ["1", "1234ü", "two"]
+        # Note: we must do the assignment as a single byte array, to avoid hitting the
+        #  safety check for this exact problem : see previous check.
+        byte_arrays = [
+            string.encode("utf-8")[:strlen] for string in ("1", "1234ü", "two")
+        ]
+        nd_bytes_array = np.array(
+            [
+                [bytes[i : i + 1] if i < len(bytes) else b"\0" for i in range(strlen)]
+                for bytes in byte_arrays
+            ]
+        )
+        v[:] = nd_bytes_array
         # This creates a problem: it won't read back
         msg = (
             "Character data in variable 'vxs' could not be decoded "

Original file line number	Diff line number	Diff line change
`@@ -146,8 +146,8 @@ def encode_stringarray_as_bytearray(`
`146`	`146`	`from iris.exceptions import TranslationError`
`147`	`147`
`148`	`148`	`msg = (`
`149`		`- f"Non-ascii string {string!r} written to netcdf exceeds string "`
`150`		`- f"dimension : {n_bytes} > {string_dimension_length}."`
	`149`	`+ f"String {string!r} written to netcdf exceeds string dimension after "`
	`150`	`+ f"encoding : {n_bytes} > {string_dimension_length}."`
`151`	`151`	`)`
`152`	`152`	`raise TranslationError(msg)`
`153`	`153`