Docstrings, better error checking, better testing

scotts · scotts · commit 7e4331331589 · 2025-11-21T12:25:18.000-08:00
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -239,6 +239,19 @@ int checkedToPositiveInt(const std::string& str) {
   return ret;
 }
 
+int checkedToNonNegativeInt(const std::string& str) {
+  int ret = 0;
+  try {
+    ret = std::stoi(str);
+  } catch (const std::invalid_argument&) {
+    TORCH_CHECK(false, "String cannot be converted to an int:" + str);
+  } catch (const std::out_of_range&) {
+    TORCH_CHECK(false, "String would become integer out of range:" + str);
+  }
+  TORCH_CHECK(ret >= 0, "String must be a non-negative integer:" + str);
+  return ret;
+}
+
 // Resize transform specs take the form:
 //
 //   "resize, <height>, <width>"
@@ -270,8 +283,8 @@ Transform* makeCropTransform(
       "cropTransformSpec must have 5 elements including its name");
   int height = checkedToPositiveInt(cropTransformSpec[1]);
   int width = checkedToPositiveInt(cropTransformSpec[2]);
-  int x = checkedToPositiveInt(cropTransformSpec[3]);
-  int y = checkedToPositiveInt(cropTransformSpec[4]);
+  int x = checkedToNonNegativeInt(cropTransformSpec[3]);
+  int y = checkedToNonNegativeInt(cropTransformSpec[4]);
   return new CropTransform(FrameDims(height, width), x, y);
 }
 
diff --git a/src/torchcodec/transforms/_decoder_transforms.py b/src/torchcodec/transforms/_decoder_transforms.py
@@ -23,8 +23,8 @@ class DecoderTransform(ABC):
     decoded frames and applying the same kind of transform.
 
     Most ``DecoderTransform`` objects have a complementary transform in TorchVision,
-    specificially in `torchvision.transforms.v2 <https://docs.pytorch.org/vision/stable/transforms.html>`_. For such transforms, we
-    ensure that:
+    specificially in `torchvision.transforms.v2 <https://docs.pytorch.org/vision/stable/transforms.html>`_.
+    For such transforms, we ensure that:
 
       1. The names are the same.
       2. Default behaviors are the same.
@@ -74,7 +74,7 @@ def _make_transform_spec(self) -> str:
         return f"resize, {self.size[0]}, {self.size[1]}"
 
     def _get_output_dims(self, input_dims: Tuple[int, int]) -> Tuple[int, int]:
-        return self.size
+        return (*self.size,)
 
     @classmethod
     def _from_torchvision(cls, resize_tv: nn.Module):
@@ -102,20 +102,51 @@ def _from_torchvision(cls, resize_tv: nn.Module):
 
 @dataclass
 class RandomCrop(DecoderTransform):
+    """Crop the decoded frame to a given size at a random location in the frame.
+
+    Complementary TorchVision transform: :class:`~torchvision.transforms.v2.RandomCrop`.
+    Padding of all kinds is disabled. The random location within the frame is
+    determined during the initialization of the
+    :class:~`torchcodec.decoders.VideoDecoder` object that owns this transform.
+    As a consequence, each decoded frame in the video will be cropped at the
+    same location. Videos with variable resolution may result in undefined
+    behavior.
+
+    Args:
+        size: (sequence of int): Desired output size. Must be a sequence of
+            the form (height, width).
+    """
 
     size: Sequence[int]
     _top: Optional[int] = None
     _left: Optional[int] = None
     _input_dims: Optional[Tuple[int, int]] = None
 
     def _make_transform_spec(self) -> str:
-        assert len(self.size) == 2
+        if len(self.size) != 2:
+            raise ValueError(
+                f"RandomCrop's size must be a sequence of length 2, got {self.size}. "
+                "This should never happen, please report a bug."
+            )
+
         if self._top is None or self._left is None:
-            assert self._input_dims is not None
+            # TODO: It would be very strange if only ONE of those is None. But should we
+            #       make it an error? We can continue, but it would probably mean
+            #       something bad happened. Dear reviewer, please register an opinion here:
+            if self._input_dims is None:
+                raise ValueError(
+                    "RandomCrop's input_dims must be set before calling _make_transform_spec(). "
+                    "This should never happen, please report a bug."
+                )
             if self._input_dims[0] < self.size[0] or self._input_dims[1] < self.size[1]:
                 raise ValueError(
                     f"Input dimensions {input_dims} are smaller than the crop size {self.size}."
                 )
+
+            # Note: This logic must match the logic in
+            #       torchvision.transforms.v2.RandomCrop.make_params(). Given
+            #       the same seed, they should get the same result. This is an
+            #       API guarantee with our users.
             self._top = torch.randint(
                 0, self._input_dims[0] - self.size[0] + 1, size=()
             )
@@ -144,17 +175,16 @@ def _from_torchvision(cls, random_crop_tv: nn.Module, input_dims: Tuple[int, int
                 "TorchVision RandomCrop transform must not specify pad_if_needed."
             )
         if random_crop_tv.fill != 0:
-            raise ValueError("TorchVision RandomCrop must specify fill of 0.")
+            raise ValueError("TorchVision RandomCrop fill must be 0.")
         if random_crop_tv.padding_mode != "constant":
-            raise ValueError(
-                "TorchVision RandomCrop must specify padding_mode of constant."
-            )
+            raise ValueError("TorchVision RandomCrop padding_mode must be constant.")
         if len(random_crop_tv.size) != 2:
             raise ValueError(
                 "TorchVision RandcomCrop transform must have a (height, width) "
                 f"pair for the size, got {random_crop_tv.size}."
             )
         params = random_crop_tv.make_params(
+            # TODO: deal with NCHW versus NHWC; video decoder knows
             torch.empty(size=(3, *input_dims), dtype=torch.uint8)
         )
         assert random_crop_tv.size == (params["height"], params["width"])
diff --git a/test/test_transform_ops.py b/test/test_transform_ops.py
@@ -147,7 +147,7 @@ def test_resize_fails(self):
 
     @pytest.mark.parametrize(
         "height_scaling_factor, width_scaling_factor",
-        ((0.5, 0.5), (0.25, 0.1)),
+        ((0.5, 0.5), (0.25, 0.1), (1.0, 1.0), (0.25, 0.25)),
     )
     @pytest.mark.parametrize("video", [NASA_VIDEO, TEST_SRC_2_720P])
     def test_random_crop_torchvision(
@@ -156,6 +156,9 @@ def test_random_crop_torchvision(
         height = int(video.get_height() * height_scaling_factor)
         width = int(video.get_width() * width_scaling_factor)
 
+        # We want both kinds of RandomCrop objects to get arrive at the same
+        # locations to crop, so we need to make sure they get the same random
+        # seed.
         torch.manual_seed(0)
         tc_random_crop = torchcodec.transforms.RandomCrop(size=(height, width))
         decoder_random_crop = VideoDecoder(video.path, transforms=[tc_random_crop])
@@ -188,6 +191,73 @@ def test_random_crop_torchvision(
             expected_shape = (video.get_num_color_channels(), height, width)
             assert frame_random_crop_tv.shape == expected_shape
 
+            frame_full = decoder_full[frame_index]
+            frame_tv = v2.functional.crop(
+                frame_full,
+                top=tc_random_crop._top,
+                left=tc_random_crop._left,
+                height=tc_random_crop.size[0],
+                width=tc_random_crop.size[1],
+            )
+            assert_frames_equal(frame_random_crop, frame_tv)
+
+    def test_crop_fails(self):
+        with pytest.raises(
+            ValueError,
+            match="must not specify padding",
+        ):
+            VideoDecoder(
+                NASA_VIDEO.path,
+                transforms=[
+                    v2.RandomCrop(
+                        size=(100, 100),
+                        padding=255,
+                    )
+                ],
+            )
+
+        with pytest.raises(
+            ValueError,
+            match="must not specify pad_if_needed",
+        ):
+            VideoDecoder(
+                NASA_VIDEO.path,
+                transforms=[
+                    v2.RandomCrop(
+                        size=(100, 100),
+                        pad_if_needed=True,
+                    )
+                ],
+            )
+
+        with pytest.raises(
+            ValueError,
+            match="fill must be 0",
+        ):
+            VideoDecoder(
+                NASA_VIDEO.path,
+                transforms=[
+                    v2.RandomCrop(
+                        size=(100, 100),
+                        fill=255,
+                    )
+                ],
+            )
+
+        with pytest.raises(
+            ValueError,
+            match="padding_mode must be constant",
+        ):
+            VideoDecoder(
+                NASA_VIDEO.path,
+                transforms=[
+                    v2.RandomCrop(
+                        size=(100, 100),
+                        padding_mode="edge",
+                    )
+                ],
+            )
+
     def test_transform_fails(self):
         with pytest.raises(
             ValueError,