Better doc strings

scotts · scotts · commit e7367421cca7 · 2025-12-03T08:51:35.000-08:00
diff --git a/src/torchcodec/transforms/_decoder_transforms.py b/src/torchcodec/transforms/_decoder_transforms.py
@@ -41,12 +41,43 @@ class DecoderTransform(ABC):
     def _make_transform_spec(
         self, input_dims: Tuple[Optional[int], Optional[int]]
     ) -> str:
+        """Makes the transform spec that is used by the `VideoDecoder`.
+
+        Args:
+            input_dims (Tuple[Optional[int], Optional[int]]): The dimensions of
+                the input frame in the form (height, width). We cannot know the
+                dimensions at object construction time because it's dependent on
+                the video being decoded and upstream transforms in the same
+                transform pipeline. Not all transforms need to know this; those
+                that don't will ignore it. The individual values in the tuple are
+                optional because the original values come from file metadata which
+                may be missing. We maintain the optionality throughout the APIs so
+                that we can decide as late as possible that it's necessary for the
+                values to exist. That is, if the values are missing from the
+                metadata and we have transforms which ignore the input dimensions,
+                we want that to still work.
+
+                Note: This method is the moral equivalent of TorchVision's
+                `Transformer.make_params()`.
+
+        Returns:
+            str: A string which contains the spec for the transform that the
+                `VideoDecoder` knows what to do with.
+        """
         pass
 
-    # Transforms that change the dimensions of their input frame return a value.
-    # Transforms that don't return None; they can rely on this default
-    # implementation.
     def _get_output_dims(self) -> Optional[Tuple[Optional[int], Optional[int]]]:
+        """Get the dimensions of the output frame.
+
+        Transforms that change the frame dimensions need to override this
+        method. Transforms that don't change the frame dimensions can rely on
+        this default implementation.
+
+        Returns:
+            Optional[Tuple[Optional[int], Optional[int]]]: The output dimensions.
+                - None: The output dimensions are the same as the input dimensions.
+                - (int, int): The (height, width) of the output frame.
+        """
         return None
 
 
@@ -68,7 +99,7 @@ class Resize(DecoderTransform):
     Interpolation is always bilinear. Anti-aliasing is always on.
 
     Args:
-        size: (sequence of int): Desired output size. Must be a sequence of
+        size (Sequence[int]): Desired output size. Must be a sequence of
             the form (height, width).
     """
 
@@ -117,13 +148,13 @@ class RandomCrop(DecoderTransform):
     Complementary TorchVision transform: :class:`~torchvision.transforms.v2.RandomCrop`.
     Padding of all kinds is disabled. The random location within the frame is
     determined during the initialization of the
-    :class:~`torchcodec.decoders.VideoDecoder` object that owns this transform.
+    :class:`~torchcodec.decoders.VideoDecoder` object that owns this transform.
     As a consequence, each decoded frame in the video will be cropped at the
     same location. Videos with variable resolution may result in undefined
     behavior.
 
     Args:
-        size: (sequence of int): Desired output size. Must be a sequence of
+        size (Sequence[int]): Desired output size. Must be a sequence of
             the form (height, width).
     """