More docs

NicolasHug · NicolasHug · commit e2519c8ce396 · 2024-12-09T11:23:50.000Z
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -519,6 +519,9 @@ Developer tools
     :template: function.rst
 
     v2.functional.register_kernel
+    v2.query_size
+    v2.query_chw
+    v2.get_bounding_boxes
 
 
 V1 API Reference
diff --git a/gallery/transforms/plot_custom_transforms.py b/gallery/transforms/plot_custom_transforms.py
@@ -12,7 +12,7 @@
 """
 
 # %%
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 import torch
 from torchvision import tv_tensors
@@ -109,9 +109,12 @@ def forward(self, img, bboxes, label):  # we assume inputs are always structured
 print(f"The transformed bboxes are:\n{structured_output['annotations'][0]}")
 
 # %%
+# Basics: override the `transform()` method
+# -----------------------------------------
+#
 # In order to support arbitrary inputs in your custom transform, you will need
 # to inherit from :class:`~torchvision.transforms.v2.Transform` and override the
-# `.transform()` method (not the `forward()` method!).
+# `.transform()` method (not the `forward()` method!). Below is a basic example:
 
 
 class MyCustomTransform(v2.Transform):
@@ -134,12 +137,63 @@ def transform(self, inpt: Any, params: Dict[str, Any]):
 print(f"The transformed bboxes are:\n{structured_output['annotations'][0]}")
 
 # %%
-# An important thing to note is that when we call `my_custom_transform` on
-# `structured_input`, the input is flattened and then each individual part is
-# passed to `transform()`. That is, `transform()` received the input image, then
-# the bounding boxes, etc. It is then within `transform()` that you can decide
-# how to transform each input, based on their type.
+# An important thing to note is that when we call ``my_custom_transform`` on
+# ``structured_input``, the input is flattened and then each individual part is
+# passed to ``transform()``. That is, ``transform()``` receives the input image,
+# then the bounding boxes, etc. Within ``transform()``, you can decide how to
+# transform each input, based on their type.
+#
+# If you're curious why the other tensor (``torch.arange()``) didn't get passed
+# to ``transform()``, see :ref:`passthrough_heuristic`.
+#
+# Advanced: The ``make_params()`` method
+# --------------------------------------
+#
+# The ``make_params()`` method is called internally before calling
+# ``transform()`` on each input. This is typically useful to generate random
+# parameter values. In the example below, we use it to randomly apply the
+# transformation with a probability of 0.5
+
+
+class MyRandomTransform(MyCustomTransform):
+    def __init__(self, p=0.5):
+        self.p = p
+        super().__init__()
+
+    def make_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
+        apply_transform = (torch.rand(size=(1,)) < self.p).item()
+        params = dict(apply_transform=apply_transform)
+        return params
+
+    def transform(self, inpt: Any, params: Dict[str, Any]):
+        if not params["apply_transform"]:
+            print("Not transforming anything!")
+            return inpt
+        else:
+            return super().transform(inpt, params)
+
+
+my_random_transform = MyRandomTransform()
+
+torch.manual_seed(0)
+_ = my_random_transform(structured_input)  # transforms
+_ = my_random_transform(structured_input)  # doesn't transform
+
+# %%
+#
+# .. note::
+#
+#     It's important for such random parameter generation to happen within
+#     ``make_params()`` and not within ``transform()``, so that for a given
+#     transform call, the same RNG applies to all the inputs in the same way. If
+#     we were to perform the RNG within ``transform()``, we would risk e.g.
+#     transforming the image while *not* transforming the bounding boxes.
 #
-# If you're curious why the other tensor (`torch.arange()`) didn't get passed to `transform()`, see :ref:`_passthrough_heuristic`.
+# The ``make_params()`` method takes the list of all the inputs as parameter
+# (each of the elements in this list will later be pased to ``transform()``).
+# You can use ``flat_inputs`` to e.g. figure out the dimensions on the input,
+# using :func:`~torchvision.transforms.v2.query_chw` or
+# :func:`~torchvision.transforms.v2.query_size`.
 #
-# TODO explain make_params()
+# ``make_params()`` should return a dict (or actually, anything you want) that
+# will then be passed to ``transform()``.
diff --git a/torchvision/transforms/v2/_utils.py b/torchvision/transforms/v2/_utils.py
@@ -151,6 +151,10 @@ def _parse_labels_getter(labels_getter: Union[str, Callable[[Any], Any], None])
 
 
 def get_bounding_boxes(flat_inputs: List[Any]) -> tv_tensors.BoundingBoxes:
+    """Return the Bounding Boxes in the input.
+
+    Assumes only one ``BoundingBoxes`` object is present.
+    """
     # This assumes there is only one bbox per sample as per the general convention
     try:
         return next(inpt for inpt in flat_inputs if isinstance(inpt, tv_tensors.BoundingBoxes))
@@ -159,9 +163,7 @@ def get_bounding_boxes(flat_inputs: List[Any]) -> tv_tensors.BoundingBoxes:
 
 
 def query_chw(flat_inputs: List[Any]) -> Tuple[int, int, int]:
-    print("AEFAEFAE")
-    print(len(flat_inputs))
-    print([type(inpt) for inpt in flat_inputs])
+    """Return Channel, Height, and Width."""
     chws = {
         tuple(get_dimensions(inpt))
         for inpt in flat_inputs
@@ -176,6 +178,7 @@ def query_chw(flat_inputs: List[Any]) -> Tuple[int, int, int]:
 
 
 def query_size(flat_inputs: List[Any]) -> Tuple[int, int]:
+    """Return Height and Width."""
     sizes = {
         tuple(get_size(inpt))
         for inpt in flat_inputs