88import json
99import numbers
1010from pathlib import Path
11- from typing import List , Literal , Optional , Sequence , Tuple , Union
11+ from typing import Literal , Optional , Tuple , Union
1212
1313import torch
14- from torch import device as torch_device , nn , Tensor
14+ from torch import device as torch_device , Tensor
1515
1616from torchcodec import _core as core , Frame , FrameBatch
1717from torchcodec .decoders ._decoder_utils import (
1818 _get_cuda_backend ,
1919 create_decoder ,
2020 ERROR_REPORTING_INSTRUCTIONS ,
2121)
22- from torchcodec .transforms import DecoderTransform , Resize
2322
2423
2524class VideoDecoder :
@@ -68,11 +67,6 @@ class VideoDecoder:
6867 probably is. Default: "exact".
6968 Read more about this parameter in:
7069 :ref:`sphx_glr_generated_examples_decoding_approximate_mode.py`
71- transforms (sequence of transform objects, optional): Sequence of transforms to be
72- applied to the decoded frames by the decoder itself, in order. Accepts both
73- :class:`~torchcodec.transforms.DecoderTransform` and
74- :class:`~torchvision.transforms.v2.Transform`
75- objects. Read more about this parameter in: TODO_DECODER_TRANSFORMS_TUTORIAL.
7670 custom_frame_mappings (str, bytes, or file-like object, optional):
7771 Mapping of frames to their metadata, typically generated via ffprobe.
7872 This enables accurate frame seeking without requiring a full video scan.
@@ -111,7 +105,6 @@ def __init__(
111105 num_ffmpeg_threads : int = 1 ,
112106 device : Optional [Union [str , torch_device ]] = None ,
113107 seek_mode : Literal ["exact" , "approximate" ] = "exact" ,
114- transforms : Optional [Sequence [Union [DecoderTransform , nn .Module ]]] = None ,
115108 custom_frame_mappings : Optional [
116109 Union [str , bytes , io .RawIOBase , io .BufferedReader ]
117110 ] = None ,
@@ -167,7 +160,6 @@ def __init__(
167160 device = str (device )
168161
169162 device_variant = _get_cuda_backend ()
170- transform_specs = _make_transform_specs (transforms )
171163
172164 core .add_video_stream (
173165 self ._decoder ,
@@ -176,7 +168,6 @@ def __init__(
176168 num_threads = num_ffmpeg_threads ,
177169 device = device ,
178170 device_variant = device_variant ,
179- transform_specs = transform_specs ,
180171 custom_frame_mappings = custom_frame_mappings_data ,
181172 )
182173
@@ -448,78 +439,6 @@ def _get_and_validate_stream_metadata(
448439 )
449440
450441
451- def _convert_to_decoder_transforms (
452- transforms : Sequence [Union [DecoderTransform , nn .Module ]],
453- ) -> List [DecoderTransform ]:
454- """Convert a sequence of transforms that may contain TorchVision transform
455- objects into a list of only TorchCodec transform objects.
456-
457- Args:
458- transforms: Squence of transform objects. The objects can be one of two
459- types:
460- 1. torchcodec.transforms.DecoderTransform
461- 2. torchvision.transforms.v2.Transform, but our type annotation
462- only mentions its base, nn.Module. We don't want to take a
463- hard dependency on TorchVision.
464-
465- Returns:
466- List of DecoderTransform objects.
467- """
468- try :
469- from torchvision .transforms import v2
470-
471- tv_available = True
472- except ImportError :
473- tv_available = False
474-
475- converted_transforms : list [DecoderTransform ] = []
476- for transform in transforms :
477- if not isinstance (transform , DecoderTransform ):
478- if not tv_available :
479- raise ValueError (
480- f"The supplied transform, { transform } , is not a TorchCodec "
481- " DecoderTransform. TorchCodec also accept TorchVision "
482- "v2 transforms, but TorchVision is not installed."
483- )
484- elif isinstance (transform , v2 .Resize ):
485- converted_transforms .append (Resize ._from_torchvision (transform ))
486- else :
487- raise ValueError (
488- f"Unsupported transform: { transform } . Transforms must be "
489- "either a TorchCodec DecoderTransform or a TorchVision "
490- "v2 transform."
491- )
492- else :
493- converted_transforms .append (transform )
494-
495- return converted_transforms
496-
497-
498- def _make_transform_specs (
499- transforms : Optional [Sequence [Union [DecoderTransform , nn .Module ]]],
500- ) -> str :
501- """Given a sequence of transforms, turn those into the specification string
502- the core API expects.
503-
504- Args:
505- transforms: Optional sequence of transform objects. The objects can be
506- one of two types:
507- 1. torchcodec.transforms.DecoderTransform
508- 2. torchvision.transforms.v2.Transform, but our type annotation
509- only mentions its base, nn.Module. We don't want to take a
510- hard dependency on TorchVision.
511-
512- Returns:
513- String of transforms in the format the core API expects: transform
514- specifications separate by semicolons.
515- """
516- if transforms is None :
517- return ""
518-
519- transforms = _convert_to_decoder_transforms (transforms )
520- return ";" .join ([t ._make_transform_spec () for t in transforms ])
521-
522-
523442def _read_custom_frame_mappings (
524443 custom_frame_mappings : Union [str , bytes , io .RawIOBase , io .BufferedReader ]
525444) -> tuple [Tensor , Tensor , Tensor ]:
0 commit comments