Ensure length of flattened_data is same as end of cumulative_length (#211)

iguinn · web-flow · commit 58ee2c51c7fd · 2026-01-30T10:17:33.000+01:00
* Ensure length of flattened_data is same as end of cumulative_length
diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
@@ -243,6 +243,11 @@ def __init__(
                 )
             elif self.flattened_data is None:
                 self.flattened_data = flattened_data
+                expected_fd_size = self.cumulative_length[-1] if len(self) > 0 else 0
+                if len(flattened_data) != expected_fd_size:
+                    msg = "provided flattened_data array has different size than indicated by cumulative_length. Resizing..."
+                    log.warning(msg)
+                    self.flattened_data.resize(expected_fd_size)
 
         super().__init__(attrs)
 
@@ -474,8 +479,8 @@ def _set_vector_unsafe(
         `vec` in ``self.flattened_data[j:sum(lens)]``. Finally updates
         ``self.cumulative_length[i]`` with the new flattened data array length.
 
-        Vectors stored after index `i` can be overridden, producing unintended
-        behavior. This method is typically used for fast sequential fill of a
+        Vectors stored after index `i` are removed and the VectorOfVectors is
+        resized. This method is typically used for fast sequential fill of a
         pre-allocated vector of vectors.
 
         If i`vec` is 1D array and `lens` is ``None``, set using full array. If
@@ -484,8 +489,9 @@ def _set_vector_unsafe(
 
         Danger
         ------
-        This method can lead to undefined behavior or vector invalidation if
-        used improperly. Use it only if you know what you are doing.
+        This method resizes the array, removes subsequent vectors, and can lead
+        to undefined behavior or vector-view invalidation if used improperly.
+        Use it only if you know what you are doing.
 
         See Also
         --------
@@ -518,6 +524,9 @@ def _set_vector_unsafe(
                 nan_val = np.nan
             from .vovutils import _nb_fill  # noqa: PLC0415
 
+            self.flattened_data.resize(cum_lens[-1])
+            self.cumulative_length.resize(i + len(lens))
+
             _nb_fill(
                 vec,
                 lens,
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
@@ -460,23 +460,20 @@ def test_set_vector_unsafe(testvov):
             == np.nan_to_num(exp_entry_w_overflow, nan=0)
         )
 
-        # test vectorized filling when len is longer than array
-        fourth_vov = lgdo.VectorOfVectors(
+        # test vectorized filling with fewer elements than are in initial array
+        fifth_vov = lgdo.VectorOfVectors(
             shape_guess=(5, 5), dtype=current_testvov.dtype
         )
-        desired_lens[3] = 10
-        fourth_vov._set_vector_unsafe(0, desired_aoa, desired_lens)
-        if current_testvov.dtype in ["int32", "int64", "uint16", "uint32"]:
-            exp_entry_w_overflow = np.concatenate(
-                [desired[3], np.array([np.iinfo(current_testvov.dtype).min] * 6)]
-            )
-        else:
-            exp_entry_w_overflow = np.concatenate([desired[3], np.array([np.nan] * 6)])
-
-        assert np.all(
-            np.nan_to_num(fourth_vov[3], nan=0)
-            == np.nan_to_num(exp_entry_w_overflow, nan=0)
+        current_testvov = VectorOfVectors(
+            flattened_data=current_testvov.flattened_data.nda[
+                : current_testvov.cumulative_length[2]
+            ],
+            cumulative_length=current_testvov.cumulative_length[:3],
         )
+        fifth_vov._set_vector_unsafe(0, desired_aoa[:3, ...], desired_lens[:3])
+        assert len(fifth_vov) == 3
+        assert len(fifth_vov.flattened_data) == len(current_testvov.flattened_data)
+        assert current_testvov == fifth_vov
 
 
 def test_iter(testvov):