From 08804933e1dd8009494b56eb52dee2af8682d1aa Mon Sep 17 00:00:00 2001 From: N00bcak Date: Fri, 9 Aug 2024 22:06:35 +0800 Subject: [PATCH 1/2] Draft for better `write_video` documentation --- torchvision/io/video.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/torchvision/io/video.py b/torchvision/io/video.py index c8f7d2ebde2..ef3b756f887 100644 --- a/torchvision/io/video.py +++ b/torchvision/io/video.py @@ -62,7 +62,12 @@ def write_video( audio_options: Optional[Dict[str, Any]] = None, ) -> None: """ - Writes a 4d tensor in [T, H, W, C] format in a video file + Writes a 4d tensor in [T, H, W, C] format in a video file. + The default parameters (i.e. `fps`, `audio_fps`) return videos + of a fixed quality & compressing speed, and may not necessarily be suitable for all applications. + Since torchvision relies on `PyAV` (therefore, ultimately `FFmpeg`) to encode videos, + you can get more fine-grained control by referring to the other options at + your disposal within `the FFMpeg wiki `_. Args: filename (str): path where the video will be saved @@ -70,12 +75,25 @@ def write_video( as a uint8 tensor in [T, H, W, C] format fps (Number): video frames per second video_codec (str): the name of the video codec, i.e. "libx264", "h264", etc. - options (Dict): dictionary containing options to be passed into the PyAV video stream + options (Dict): dictionary containing options to be passed into the PyAV video stream. + The list of options is codec-dependent and can all + be found from `the FFMpeg wiki `_. audio_array (Tensor[C, N]): tensor containing the audio, where C is the number of channels and N is the number of samples audio_fps (Number): audio sample rate, typically 44100 or 48000 audio_codec (str): the name of the audio codec, i.e. "mp3", "aac", etc. - audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream + audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream. + The list of options is codec-dependent and can all + be found from `the FFMpeg wiki `_. + + Examples:: + >>> # Creating libx264 video with CRF 17, for visually lossless footage: + >>> + >>> from torchvision.io import write_video + >>> # 1000 frames of 100x100, 3-channel image. + >>> vid = torch.randn(1000, 100, 100, 3, dtype = torch.uint8) + >>> write_video("video.mp4", options = {"crf": "17"}) + """ if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(write_video) From 585a3fedabc6d9ba448fd51058b692f74f623093 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 11 Oct 2024 12:32:19 +0100 Subject: [PATCH 2/2] nit --- torchvision/io/video.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/torchvision/io/video.py b/torchvision/io/video.py index ef3b756f887..a3d23ee5ad8 100644 --- a/torchvision/io/video.py +++ b/torchvision/io/video.py @@ -63,11 +63,11 @@ def write_video( ) -> None: """ Writes a 4d tensor in [T, H, W, C] format in a video file. - The default parameters (i.e. `fps`, `audio_fps`) return videos - of a fixed quality & compressing speed, and may not necessarily be suitable for all applications. - Since torchvision relies on `PyAV` (therefore, ultimately `FFmpeg`) to encode videos, - you can get more fine-grained control by referring to the other options at - your disposal within `the FFMpeg wiki `_. + + This function relies on PyAV (therefore, ultimately FFmpeg) to encode + videos, you can get more fine-grained control by referring to the other + options at your disposal within `the FFMpeg wiki + `_. Args: filename (str): path where the video will be saved @@ -85,15 +85,15 @@ def write_video( audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream. The list of options is codec-dependent and can all be found from `the FFMpeg wiki `_. - + Examples:: >>> # Creating libx264 video with CRF 17, for visually lossless footage: >>> >>> from torchvision.io import write_video >>> # 1000 frames of 100x100, 3-channel image. - >>> vid = torch.randn(1000, 100, 100, 3, dtype = torch.uint8) + >>> vid = torch.randn(1000, 100, 100, 3, dtype = torch.uint8) >>> write_video("video.mp4", options = {"crf": "17"}) - + """ if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(write_video)