diff --git a/torchvision/io/video.py b/torchvision/io/video.py index 73c97f37e29..9f768ed555d 100644 --- a/torchvision/io/video.py +++ b/torchvision/io/video.py @@ -62,7 +62,12 @@ def write_video( audio_options: Optional[Dict[str, Any]] = None, ) -> None: """ - Writes a 4d tensor in [T, H, W, C] format in a video file + Writes a 4d tensor in [T, H, W, C] format in a video file. + + This function relies on PyAV (therefore, ultimately FFmpeg) to encode + videos, you can get more fine-grained control by referring to the other + options at your disposal within `the FFMpeg wiki + `_. .. warning:: @@ -78,12 +83,25 @@ def write_video( as a uint8 tensor in [T, H, W, C] format fps (Number): video frames per second video_codec (str): the name of the video codec, i.e. "libx264", "h264", etc. - options (Dict): dictionary containing options to be passed into the PyAV video stream + options (Dict): dictionary containing options to be passed into the PyAV video stream. + The list of options is codec-dependent and can all + be found from `the FFMpeg wiki `_. audio_array (Tensor[C, N]): tensor containing the audio, where C is the number of channels and N is the number of samples audio_fps (Number): audio sample rate, typically 44100 or 48000 audio_codec (str): the name of the audio codec, i.e. "mp3", "aac", etc. - audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream + audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream. + The list of options is codec-dependent and can all + be found from `the FFMpeg wiki `_. + + Examples:: + >>> # Creating libx264 video with CRF 17, for visually lossless footage: + >>> + >>> from torchvision.io import write_video + >>> # 1000 frames of 100x100, 3-channel image. + >>> vid = torch.randn(1000, 100, 100, 3, dtype = torch.uint8) + >>> write_video("video.mp4", options = {"crf": "17"}) + """ if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(write_video)