From ebfdea27b42a8c1effaa999e189da88cb69f97b8 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 6 Nov 2024 12:35:53 +0000
Subject: [PATCH 1/6] Add docstrings for samplers

---
 docs/source/api_ref_samplers.rst        | 18 ++++++
 docs/source/index.rst                   |  1 +
 src/torchcodec/_frame.py                |  5 +-
 src/torchcodec/samplers/_common.py      | 10 ++++
 src/torchcodec/samplers/_index_based.py | 56 +++++++++++++++++-
 src/torchcodec/samplers/_time_based.py  | 77 +++++++++++++++++++++++++
 6 files changed, 165 insertions(+), 2 deletions(-)
 create mode 100644 docs/source/api_ref_samplers.rst

diff --git a/docs/source/api_ref_samplers.rst b/docs/source/api_ref_samplers.rst
new file mode 100644
index 000000000..9c7f8029e
--- /dev/null
+++ b/docs/source/api_ref_samplers.rst
@@ -0,0 +1,18 @@
+.. _samplers:
+
+===================
+torchcodec.samplers
+===================
+
+.. currentmodule:: torchcodec.samplers
+
+
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+    :template: function.rst
+
+    clips_at_regular_indices
+    clips_at_random_indices
+    clips_at_regular_timestamps
+    clips_at_random_timestamps
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1ce569f3a..d7011e245 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -75,3 +75,4 @@ We achieve these capabilities through:
 
    api_ref_torchcodec
    api_ref_decoders
+   api_ref_samplers
diff --git a/src/torchcodec/_frame.py b/src/torchcodec/_frame.py
index fd792cebc..3a9b9be6b 100644
--- a/src/torchcodec/_frame.py
+++ b/src/torchcodec/_frame.py
@@ -56,7 +56,10 @@ def __repr__(self):
 
 @dataclass
 class FrameBatch(Iterable):
-    """Multiple video frames with associated metadata."""
+    """Multiple video frames with associated metadata.
+
+    TODO: correctly document dimensions.
+    """
 
     data: Tensor
     """The frames data as (4-D ``torch.Tensor``)."""
diff --git a/src/torchcodec/samplers/_common.py b/src/torchcodec/samplers/_common.py
index abf42ffff..5ab795a5f 100644
--- a/src/torchcodec/samplers/_common.py
+++ b/src/torchcodec/samplers/_common.py
@@ -69,3 +69,13 @@ def _reshape_4d_framebatch_into_5d(
         pts_seconds=frames.pts_seconds.view(num_clips, num_frames_per_clip),
         duration_seconds=frames.duration_seconds.view(num_clips, num_frames_per_clip),
     )
+
+
+_FRAMEBATCH_RETURN_DOCS = """
+    Returns:
+        FrameBatch:
+            The sampled clips, as a 5D :class:`~torchcodec.FrameBatch`.
+            The shape of the ``data`` field is (``num_clips``,
+            ``num_frames_per_clips``, ...) where ... is (H, W, C) or (C, H, W)
+            depending on the decoder options.
+"""
diff --git a/src/torchcodec/samplers/_index_based.py b/src/torchcodec/samplers/_index_based.py
index d528f8019..5fc0d8a54 100644
--- a/src/torchcodec/samplers/_index_based.py
+++ b/src/torchcodec/samplers/_index_based.py
@@ -5,6 +5,7 @@
 from torchcodec import FrameBatch
 from torchcodec.decoders import VideoDecoder
 from torchcodec.samplers._common import (
+    _FRAMEBATCH_RETURN_DOCS,
     _POLICY_FUNCTION_TYPE,
     _POLICY_FUNCTIONS,
     _reshape_4d_framebatch_into_5d,
@@ -216,7 +217,6 @@ def clips_at_regular_indices(
     sampling_range_end: Optional[int] = None,  # interval is [start, end).
     policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
-
     return _generic_index_based_sampler(
         kind="regular",
         decoder=decoder,
@@ -227,3 +227,57 @@ def clips_at_regular_indices(
         sampling_range_end=sampling_range_end,
         policy=policy,
     )
+
+
+_COMMON_DOCS = f"""
+    Args:
+        decoder (VideoDecoder): The :class:`~torchcodec.decoders.VideoDecoder`
+            instance to sample clips from.
+        num_clips (int, optional): The number of clips to sample. Default: 1.
+        num_frames_per_clip (int, optional): The number of frames per clips. Default: 1.
+        num_indices_between_frames(int, optional): The number of indices between
+            the frames *within* a clip. Default: 1, which means frames are
+            consecutive. This is sometimes refered-to as "dilation".
+        sampling_range_start (int, optional): The start of the sampling range,
+            which defines the first index that a clip may *start* at. Default:
+            0, i.e. the start of the video.
+        sampling_range_end (int or None, optional): The end of the sampling
+            range, which defines the last index that a clip may *start* at. This
+            value is exclusive, i.e. a clip may only start within
+            [``sampling_range_start``, ``sampling_range_end``). If None
+            (default), the value is set automatically such that the clips never
+            span beyond the end of the video. For example if the last valid
+            index in a video is 99 and the clips span 10 frames, this value is
+            set to 99 - 10 + 1 = 90. Negative values are accepted and are
+            equivalent to ``len(video) - val``. When a clip spans beyond the end
+            of the video, the ``policy`` parameter defines how to construct such
+            clip.
+        policy (str, optional): Defines how to construct clips that span beyond
+            the end of the video. This is best described with an example:
+            assuming the last valid index in a video is 99, and a clip was
+            sampled to start at index 95, with ``num_frames_per_clip=5`` and
+            ``num_indices_between_frames=2``, the indices of the frames in the
+            clip are supposed to be [95, 97, 99, 101, 103]. But 101 and 103 are
+            invalid indices, so the ``policy`` parameter defines how to replace
+            those frames, with valid indices:
+
+            - "repeat": repeats the last valid frame of the clip. We would get
+              [95, 97, 99, 99, 99].
+            - "wrap": wraps around to the beginning of the clip. We would get
+              [95, 97, 99, 95, 97].
+            - "error": raises an error.
+
+            Default is "repeat". Note that when ``sampling_range_end=None``
+            (default), this policy parameter is unlikely to be relevant.
+
+    {_FRAMEBATCH_RETURN_DOCS}
+"""
+
+clips_at_random_indices.__doc__ = f"""Sample clips at random indices.
+{_COMMON_DOCS}
+"""
+
+
+clips_at_regular_indices.__doc__ = f"""Sample clips at regular (equally-spaced) indices.
+{_COMMON_DOCS}
+"""
diff --git a/src/torchcodec/samplers/_time_based.py b/src/torchcodec/samplers/_time_based.py
index 888fd52a1..96b61fd0a 100644
--- a/src/torchcodec/samplers/_time_based.py
+++ b/src/torchcodec/samplers/_time_based.py
@@ -263,3 +263,80 @@ def clips_at_regular_timestamps(
         sampling_range_end=sampling_range_end,
         policy=policy,
     )
+
+
+_COMMON_DOCS = """
+    {maybe_note}
+
+    Args:
+        decoder (VideoDecoder): The :class:`~torchcodec.decoders.VideoDecoder`
+            instance to sample clips from.
+        {num_clips_or_seconds_between_clip_starts}
+        num_frames_per_clip (int, optional): The number of frames per clips. Default: 1.
+        seconds_between_frames (float or None, optional): The time (in seconds)
+            between each frame within a clip. More accurately, this defines the
+            time between the *frame sampling point*, i.e. the timestamps at
+            which we sample the frames. Because frames span intervals in time ,
+            the resulting start of frames within a clip may not be exactly
+            spaced by ``seconds_between_frames`` - but on average, they will be.
+            Default is None, which is set to the average frame duration
+            (``1/average_fps``).
+        sampling_range_start (float or None, optional): The start of the
+            sampling range, which defines the first timestamp (in seconds) that
+            a clip may *start* at. Default: None, which corresponds to the start
+            of the video. (Note: some videos start at negative values, which is
+            why the default is not 0).
+        sampling_range_end (float or None, optional): The end of the sampling
+            range, which defines the last timestamp (in seconds) that a clip may
+            *start* at. This value is exclusive, i.e. a clip may only start within
+            [``sampling_range_start``, ``sampling_range_end``). If None
+            (default), the value is set automatically such that the clips never
+            span beyond the end of the video, i.e. it is set to
+            ``end_video_seconds - (num_frames_per_clip - 1) *
+            seconds_between_frames``. When a clip spans beyond the end of the
+            video, the ``policy`` parameter defines how to construct such clip.
+        policy (str, optional): Defines how to construct clips that span beyond
+            the end of the video. This is best described with an example:
+            assuming the last valid (seekable) timestamp in a video is 10.9, and
+            a clip was sampled to start at timestamp 10.5, with
+            ``num_frames_per_clip=5`` and ``seconds_between_frames=0.2``, the
+            sampling timestamps of the frames in the clip are supposed to be
+            [10.5, 10.7, 10.9, 11.1, 11.2]. But 11.1 and 11.2 are invalid
+            timestamps, so the ``policy`` parameter defines how to replace those
+            frames, with valid sampling timestamps:
+
+            - "repeat": repeats the last valid frame of the clip. We would get
+              frames sampled at timestamps [10.5, 10.7, 10.9, 10.9, 10.9].
+            - "wrap": wraps around to the beginning of the clip. We would get
+              frames sampled at timestamps [10.5, 10.7, 10.9, 10.5, 10.7].
+            - "error": raises an error.
+
+            Default is "repeat". Note that when ``sampling_range_end=None``
+            (default), this policy parameter is unlikely to be relevant.
+"""
+
+
+_NUM_CLIPS_DOCS = """
+        num_clips (int, optional): The number of clips to sample. Default: 1.
+"""
+clips_at_random_timestamps.__doc__ = f"""Sample clips at random timestamps.
+{_COMMON_DOCS.format(maybe_note="", num_clips_or_seconds_between_clip_starts=_NUM_CLIPS_DOCS)}
+"""
+
+
+_SECONDS_BETWEEN_CLIP_STARTS = """
+        seconds_between_clip_starts (float): The space (in seconds) between each
+            clip start.
+"""
+
+_NOTE_DOCS = """
+    .. note::
+        For consistency with existing sampling APIs (such as torchvision), this
+        sampler takes a ``seconds_between_clip_starts`` parameter instead of
+        ``num_clips``. If you find that supporting ``num_clips`` would be
+        useful, please let us know by `opening a feature request
+        <https://github.com/pytorch/torchcodec/issues?q=is:open+is:issue>`_.
+"""
+clips_at_regular_timestamps.__doc__ = f"""Sample clips at regular (equally-spaced) timestamps.
+{_COMMON_DOCS.format(maybe_note=_NOTE_DOCS, num_clips_or_seconds_between_clip_starts=_SECONDS_BETWEEN_CLIP_STARTS)}
+"""

From 401cff5b55b2d76d5c4b5dc63c1a462bb5f0b070 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 6 Nov 2024 13:41:59 +0000
Subject: [PATCH 2/6] Forgot to document FrameBatch / clips

---
 docs/source/glossary.rst                |  7 +++++++
 src/torchcodec/_frame.py                | 11 +++++++----
 src/torchcodec/samplers/_common.py      |  6 ++++--
 src/torchcodec/samplers/_index_based.py |  4 ++--
 src/torchcodec/samplers/_time_based.py  | 11 +++++++----
 5 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
index 18baa7e5b..4836eae57 100644
--- a/docs/source/glossary.rst
+++ b/docs/source/glossary.rst
@@ -17,3 +17,10 @@ Glossary
        A scan corresponds to an entire pass over a video file, with the purpose
        of retrieving metadata about the different streams and frames. **It does
        not involve decoding**, so it is a lot cheaper than decoding the file.
+
+    clips
+        A clip is a sequence of frames, usually in presentation order. The
+        frames may not necessarily be consecutive. A clip is represented as a 4D
+        :class:`~torchcodec.FrameBatch`. A group of clips, which is what the
+        :ref:`samplers <samplers>` return, is represented as 5D
+        :class:`~torchcodec.FrameBatch`.
diff --git a/src/torchcodec/_frame.py b/src/torchcodec/_frame.py
index 3a9b9be6b..dc46aa2aa 100644
--- a/src/torchcodec/_frame.py
+++ b/src/torchcodec/_frame.py
@@ -58,15 +58,18 @@ def __repr__(self):
 class FrameBatch(Iterable):
     """Multiple video frames with associated metadata.
 
-    TODO: correctly document dimensions.
+    The ``data`` tensor is typically 4D for sequences of frames (NHWC or NCHW),
+    or 5D for sequences of clips, as returned by the :ref:`samplers <samplers>`.
+    When ``data`` is 4D (resp. 5D) the ``pts_seconds`` and ``duration_seconds``
+    tensors are 1D (resp. 2D).
     """
 
     data: Tensor
-    """The frames data as (4-D ``torch.Tensor``)."""
+    """The frames data (``torch.Tensor`` of uint8)."""
     pts_seconds: Tensor
-    """The :term:`pts` of the frame, in seconds (1-D ``torch.Tensor`` of floats)."""
+    """The :term:`pts` of the frame, in seconds (``torch.Tensor`` of floats)."""
     duration_seconds: Tensor
-    """The duration of the frame, in seconds (1-D ``torch.Tensor`` of floats)."""
+    """The duration of the frame, in seconds (``torch.Tensor`` of floats)."""
 
     def __post_init__(self):
         # This is called after __init__() when a FrameBatch is created. We can
diff --git a/src/torchcodec/samplers/_common.py b/src/torchcodec/samplers/_common.py
index 5ab795a5f..609d8efbe 100644
--- a/src/torchcodec/samplers/_common.py
+++ b/src/torchcodec/samplers/_common.py
@@ -74,8 +74,10 @@ def _reshape_4d_framebatch_into_5d(
 _FRAMEBATCH_RETURN_DOCS = """
     Returns:
         FrameBatch:
-            The sampled clips, as a 5D :class:`~torchcodec.FrameBatch`.
+            The sampled :term:`clips`, as a 5D :class:`~torchcodec.FrameBatch`.
             The shape of the ``data`` field is (``num_clips``,
             ``num_frames_per_clips``, ...) where ... is (H, W, C) or (C, H, W)
-            depending on the decoder options.
+            depending on the decoder options. The shape of the ``pts_seconds``
+            and ``duration_seconds`` fields is (``num_clips``,
+            ``num_frames_per_clips``)
 """
diff --git a/src/torchcodec/samplers/_index_based.py b/src/torchcodec/samplers/_index_based.py
index 5fc0d8a54..fd530bc0d 100644
--- a/src/torchcodec/samplers/_index_based.py
+++ b/src/torchcodec/samplers/_index_based.py
@@ -273,11 +273,11 @@ def clips_at_regular_indices(
     {_FRAMEBATCH_RETURN_DOCS}
 """
 
-clips_at_random_indices.__doc__ = f"""Sample clips at random indices.
+clips_at_random_indices.__doc__ = f"""Sample :term:`clips` at random indices.
 {_COMMON_DOCS}
 """
 
 
-clips_at_regular_indices.__doc__ = f"""Sample clips at regular (equally-spaced) indices.
+clips_at_regular_indices.__doc__ = f"""Sample :term:`clips` at regular (equally-spaced) indices.
 {_COMMON_DOCS}
 """
diff --git a/src/torchcodec/samplers/_time_based.py b/src/torchcodec/samplers/_time_based.py
index 96b61fd0a..44eb5488e 100644
--- a/src/torchcodec/samplers/_time_based.py
+++ b/src/torchcodec/samplers/_time_based.py
@@ -4,6 +4,7 @@
 
 from torchcodec import FrameBatch
 from torchcodec.samplers._common import (
+    _FRAMEBATCH_RETURN_DOCS,
     _POLICY_FUNCTION_TYPE,
     _POLICY_FUNCTIONS,
     _reshape_4d_framebatch_into_5d,
@@ -313,14 +314,16 @@ def clips_at_regular_timestamps(
 
             Default is "repeat". Note that when ``sampling_range_end=None``
             (default), this policy parameter is unlikely to be relevant.
+
+    {return_docs}
 """
 
 
 _NUM_CLIPS_DOCS = """
         num_clips (int, optional): The number of clips to sample. Default: 1.
 """
-clips_at_random_timestamps.__doc__ = f"""Sample clips at random timestamps.
-{_COMMON_DOCS.format(maybe_note="", num_clips_or_seconds_between_clip_starts=_NUM_CLIPS_DOCS)}
+clips_at_random_timestamps.__doc__ = f"""Sample :term:`clips` at random timestamps.
+{_COMMON_DOCS.format(maybe_note="", num_clips_or_seconds_between_clip_starts=_NUM_CLIPS_DOCS, return_docs=_FRAMEBATCH_RETURN_DOCS)}
 """
 
 
@@ -337,6 +340,6 @@ def clips_at_regular_timestamps(
         useful, please let us know by `opening a feature request
         <https://github.com/pytorch/torchcodec/issues?q=is:open+is:issue>`_.
 """
-clips_at_regular_timestamps.__doc__ = f"""Sample clips at regular (equally-spaced) timestamps.
-{_COMMON_DOCS.format(maybe_note=_NOTE_DOCS, num_clips_or_seconds_between_clip_starts=_SECONDS_BETWEEN_CLIP_STARTS)}
+clips_at_regular_timestamps.__doc__ = f"""Sample :term:`clips` at regular (equally-spaced) timestamps.
+{_COMMON_DOCS.format(maybe_note=_NOTE_DOCS, num_clips_or_seconds_between_clip_starts=_SECONDS_BETWEEN_CLIP_STARTS, return_docs=_FRAMEBATCH_RETURN_DOCS)}
 """

From 5a0f14c61093b78c21b6ca18f10f03e322fe3c3f Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 6 Nov 2024 14:27:53 +0000
Subject: [PATCH 3/6] address comments

---
 src/torchcodec/samplers/_common.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/torchcodec/samplers/_common.py b/src/torchcodec/samplers/_common.py
index 609d8efbe..a129a4483 100644
--- a/src/torchcodec/samplers/_common.py
+++ b/src/torchcodec/samplers/_common.py
@@ -77,7 +77,8 @@ def _reshape_4d_framebatch_into_5d(
             The sampled :term:`clips`, as a 5D :class:`~torchcodec.FrameBatch`.
             The shape of the ``data`` field is (``num_clips``,
             ``num_frames_per_clips``, ...) where ... is (H, W, C) or (C, H, W)
-            depending on the decoder options. The shape of the ``pts_seconds``
-            and ``duration_seconds`` fields is (``num_clips``,
-            ``num_frames_per_clips``)
+            depending on the ``dimension_order`` parameter of
+            :class:`~torchcodec.decoders.VideoDecoder`. The shape of the
+            ``pts_seconds`` and ``duration_seconds`` fields is (``num_clips``,
+            ``num_frames_per_clips``).
 """

From 69686cbfb570e40c467ba3b6efc6438e2cf53ab2 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 6 Nov 2024 14:34:37 +0000
Subject: [PATCH 4/6] Fix 'repeast_last' doc and associated type annotation

---
 src/torchcodec/samplers/_index_based.py |  6 +++---
 src/torchcodec/samplers/_time_based.py  | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/torchcodec/samplers/_index_based.py b/src/torchcodec/samplers/_index_based.py
index fd530bc0d..c3d001d86 100644
--- a/src/torchcodec/samplers/_index_based.py
+++ b/src/torchcodec/samplers/_index_based.py
@@ -261,13 +261,13 @@ def clips_at_regular_indices(
             invalid indices, so the ``policy`` parameter defines how to replace
             those frames, with valid indices:
 
-            - "repeat": repeats the last valid frame of the clip. We would get
-              [95, 97, 99, 99, 99].
+            - "repeat_last": repeats the last valid frame of the clip. We would
+              get [95, 97, 99, 99, 99].
             - "wrap": wraps around to the beginning of the clip. We would get
               [95, 97, 99, 95, 97].
             - "error": raises an error.
 
-            Default is "repeat". Note that when ``sampling_range_end=None``
+            Default is "repeat_last". Note that when ``sampling_range_end=None``
             (default), this policy parameter is unlikely to be relevant.
 
     {_FRAMEBATCH_RETURN_DOCS}
diff --git a/src/torchcodec/samplers/_time_based.py b/src/torchcodec/samplers/_time_based.py
index 44eb5488e..303e6b595 100644
--- a/src/torchcodec/samplers/_time_based.py
+++ b/src/torchcodec/samplers/_time_based.py
@@ -157,7 +157,7 @@ def _generic_time_based_sampler(
     # None means "begining", which may not always be 0
     sampling_range_start: Optional[float],
     sampling_range_end: Optional[float],  # interval is [start, end).
-    policy: str = "repeat_last",
+    policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
     # Note: *everywhere*, sampling_range_end denotes the upper bound of where a
     # clip can start. This is an *open* upper bound, i.e. we will make sure no
@@ -227,7 +227,7 @@ def clips_at_random_timestamps(
     # None means "begining", which may not always be 0
     sampling_range_start: Optional[float] = None,
     sampling_range_end: Optional[float] = None,  # interval is [start, end).
-    policy: str = "repeat_last",
+    policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
     return _generic_time_based_sampler(
         kind="random",
@@ -251,7 +251,7 @@ def clips_at_regular_timestamps(
     # None means "begining", which may not always be 0
     sampling_range_start: Optional[float] = None,
     sampling_range_end: Optional[float] = None,  # interval is [start, end).
-    policy: str = "repeat_last",
+    policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
     return _generic_time_based_sampler(
         kind="regular",
@@ -306,13 +306,13 @@ def clips_at_regular_timestamps(
             timestamps, so the ``policy`` parameter defines how to replace those
             frames, with valid sampling timestamps:
 
-            - "repeat": repeats the last valid frame of the clip. We would get
-              frames sampled at timestamps [10.5, 10.7, 10.9, 10.9, 10.9].
+            - "repeat_last": repeats the last valid frame of the clip. We would
+              get frames sampled at timestamps [10.5, 10.7, 10.9, 10.9, 10.9].
             - "wrap": wraps around to the beginning of the clip. We would get
               frames sampled at timestamps [10.5, 10.7, 10.9, 10.5, 10.7].
             - "error": raises an error.
 
-            Default is "repeat". Note that when ``sampling_range_end=None``
+            Default is "repeat_last". Note that when ``sampling_range_end=None``
             (default), this policy parameter is unlikely to be relevant.
 
     {return_docs}

From 4908e9f481c36c8b472bb5e97fd21552e2ad720c Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 6 Nov 2024 14:35:11 +0000
Subject: [PATCH 5/6] Add comment to point to actual docstring

---
 src/torchcodec/samplers/_index_based.py | 2 ++
 src/torchcodec/samplers/_time_based.py  | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/torchcodec/samplers/_index_based.py b/src/torchcodec/samplers/_index_based.py
index c3d001d86..a134c7249 100644
--- a/src/torchcodec/samplers/_index_based.py
+++ b/src/torchcodec/samplers/_index_based.py
@@ -195,6 +195,7 @@ def clips_at_random_indices(
     sampling_range_end: Optional[int] = None,  # interval is [start, end).
     policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
+    # See docstring below
     return _generic_index_based_sampler(
         kind="random",
         decoder=decoder,
@@ -217,6 +218,7 @@ def clips_at_regular_indices(
     sampling_range_end: Optional[int] = None,  # interval is [start, end).
     policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
+    # See docstring below
     return _generic_index_based_sampler(
         kind="regular",
         decoder=decoder,
diff --git a/src/torchcodec/samplers/_time_based.py b/src/torchcodec/samplers/_time_based.py
index 303e6b595..818b60dc8 100644
--- a/src/torchcodec/samplers/_time_based.py
+++ b/src/torchcodec/samplers/_time_based.py
@@ -229,6 +229,7 @@ def clips_at_random_timestamps(
     sampling_range_end: Optional[float] = None,  # interval is [start, end).
     policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
+    # See docstring below
     return _generic_time_based_sampler(
         kind="random",
         decoder=decoder,
@@ -253,6 +254,7 @@ def clips_at_regular_timestamps(
     sampling_range_end: Optional[float] = None,  # interval is [start, end).
     policy: Literal["repeat_last", "wrap", "error"] = "repeat_last",
 ) -> FrameBatch:
+    # See docstring below
     return _generic_time_based_sampler(
         kind="regular",
         decoder=decoder,

From 0b1ef0af6074534ccf3ae0233c4ad68587384b5f Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Wed, 6 Nov 2024 15:31:12 +0000
Subject: [PATCH 6/6] Address comments

---
 docs/source/glossary.rst                | 4 ++--
 src/torchcodec/samplers/_index_based.py | 2 +-
 src/torchcodec/samplers/_time_based.py  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/glossary.rst b/docs/source/glossary.rst
index 4836eae57..0d648b9b6 100644
--- a/docs/source/glossary.rst
+++ b/docs/source/glossary.rst
@@ -19,8 +19,8 @@ Glossary
        not involve decoding**, so it is a lot cheaper than decoding the file.
 
     clips
-        A clip is a sequence of frames, usually in presentation order. The
-        frames may not necessarily be consecutive. A clip is represented as a 4D
+        A clip is a sequence of frames, usually in :term:`pts` order. The frames
+        may not necessarily be consecutive. A clip is represented as a 4D
         :class:`~torchcodec.FrameBatch`. A group of clips, which is what the
         :ref:`samplers <samplers>` return, is represented as 5D
         :class:`~torchcodec.FrameBatch`.
diff --git a/src/torchcodec/samplers/_index_based.py b/src/torchcodec/samplers/_index_based.py
index a134c7249..a81fa645e 100644
--- a/src/torchcodec/samplers/_index_based.py
+++ b/src/torchcodec/samplers/_index_based.py
@@ -235,7 +235,7 @@ def clips_at_regular_indices(
     Args:
         decoder (VideoDecoder): The :class:`~torchcodec.decoders.VideoDecoder`
             instance to sample clips from.
-        num_clips (int, optional): The number of clips to sample. Default: 1.
+        num_clips (int, optional): The number of clips to return. Default: 1.
         num_frames_per_clip (int, optional): The number of frames per clips. Default: 1.
         num_indices_between_frames(int, optional): The number of indices between
             the frames *within* a clip. Default: 1, which means frames are
diff --git a/src/torchcodec/samplers/_time_based.py b/src/torchcodec/samplers/_time_based.py
index 818b60dc8..2b531e53d 100644
--- a/src/torchcodec/samplers/_time_based.py
+++ b/src/torchcodec/samplers/_time_based.py
@@ -322,7 +322,7 @@ def clips_at_regular_timestamps(
 
 
 _NUM_CLIPS_DOCS = """
-        num_clips (int, optional): The number of clips to sample. Default: 1.
+        num_clips (int, optional): The number of clips to return. Default: 1.
 """
 clips_at_random_timestamps.__doc__ = f"""Sample :term:`clips` at random timestamps.
 {_COMMON_DOCS.format(maybe_note="", num_clips_or_seconds_between_clip_starts=_NUM_CLIPS_DOCS, return_docs=_FRAMEBATCH_RETURN_DOCS)}