-
Notifications
You must be signed in to change notification settings - Fork 86
Add FPS to get_frames_played_in_range()
#1148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -844,7 +844,8 @@ FrameBatchOutput SingleStreamDecoder::getFramesPlayedAt( | |
|
|
||
| FrameBatchOutput SingleStreamDecoder::getFramesPlayedInRange( | ||
| double startSeconds, | ||
| double stopSeconds) { | ||
| double stopSeconds, | ||
| std::optional<double> fps) { | ||
| validateActiveStream(AVMEDIA_TYPE_VIDEO); | ||
| const auto& streamMetadata = | ||
| containerMetadata_.allStreamMetadata[activeStreamIndex_]; | ||
|
|
@@ -906,6 +907,60 @@ FrameBatchOutput SingleStreamDecoder::getFramesPlayedInRange( | |
| std::to_string(maxSeconds.value()) + ")."); | ||
| } | ||
|
|
||
| // Resample frames to match the target frame rate | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could add an early break if requested
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's handle that with a follow-up. Because we'll need to do equality on a double, there may be some subtle things to handle. |
||
| if (fps.has_value()) { | ||
| TORCH_CHECK( | ||
| fps.value() > 0, | ||
| "fps must be positive, got " + std::to_string(fps.value())); | ||
|
|
||
| // TODO: add an early break if requested fps is the same as the current fps | ||
|
|
||
| double fpsVal = fps.value(); | ||
| double frameDuration = 1.0 / fpsVal; | ||
|
|
||
| double product = (stopSeconds - startSeconds) * fpsVal; | ||
| int64_t numOutputFrames = static_cast<int64_t>(std::round(product)); | ||
|
|
||
| // Generate target timestamps and find source frame indices | ||
| std::vector<int64_t> sourceFrameIndices(numOutputFrames); | ||
| std::vector<double> targetTimestamps(numOutputFrames); | ||
| for (int64_t i = 0; i < numOutputFrames; ++i) { | ||
| targetTimestamps[i] = startSeconds + i * frameDuration; | ||
| sourceFrameIndices[i] = secondsToIndexLowerBound(targetTimestamps[i]); | ||
| } | ||
|
|
||
| FrameBatchOutput frameBatchOutput( | ||
| numOutputFrames, | ||
| resizedOutputDims_.value_or(metadataDims_), | ||
| videoStreamOptions.device); | ||
|
|
||
| // Decode frames, reusing already-decoded frames for duplicates | ||
| int64_t lastDecodedSourceIndex = -1; | ||
| torch::Tensor lastDecodedData; | ||
|
|
||
| for (int64_t i = 0; i < numOutputFrames; ++i) { | ||
| int64_t sourceIdx = sourceFrameIndices[i]; | ||
|
|
||
| if (sourceIdx == lastDecodedSourceIndex && lastDecodedSourceIndex >= 0) { | ||
| frameBatchOutput.data[i].copy_(lastDecodedData); | ||
| } else { | ||
| FrameOutput frameOutput = | ||
| getFrameAtIndexInternal(sourceIdx, frameBatchOutput.data[i]); | ||
| lastDecodedData = frameBatchOutput.data[i]; | ||
| lastDecodedSourceIndex = sourceIdx; | ||
| } | ||
|
|
||
| frameBatchOutput.ptsSeconds[i] = targetTimestamps[i]; | ||
| frameBatchOutput.durationSeconds[i] = frameDuration; | ||
| } | ||
|
|
||
| frameBatchOutput.data = maybePermuteHWC2CHW(frameBatchOutput.data); | ||
| return frameBatchOutput; | ||
| } | ||
|
|
||
| // Original behavior when fps is not specified: | ||
| // Return all frames in range at source fps | ||
|
|
||
| // Note that we look at nextPts for a frame, and not its pts or duration. | ||
| // Our abstract player displays frames starting at the pts for that frame | ||
| // until the pts for the next frame. There are two consequences: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,6 @@ | |
|
|
||
| import torch | ||
| from torch import device as torch_device, nn, Tensor | ||
|
|
||
| from torchcodec import _core as core, Frame, FrameBatch | ||
| from torchcodec.decoders._decoder_utils import ( | ||
| _get_cuda_backend, | ||
|
|
@@ -452,32 +451,31 @@ def get_frames_played_at(self, seconds: torch.Tensor | list[float]) -> FrameBatc | |
| ) | ||
|
|
||
| def get_frames_played_in_range( | ||
| self, start_seconds: float, stop_seconds: float | ||
| self, start_seconds: float, stop_seconds: float, fps: float | None = None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we expose the fps functionality in this method, then for convenience I think we should make sure to also expose a |
||
| ) -> FrameBatch: | ||
| """Returns multiple frames in the given range. | ||
|
|
||
| Frames are in the half open range [start_seconds, stop_seconds). Each | ||
| returned frame's :term:`pts`, in seconds, is inside of the half open | ||
| range. | ||
|
|
||
| Args: | ||
| start_seconds (float): Time, in seconds, of the start of the | ||
| range. | ||
| stop_seconds (float): Time, in seconds, of the end of the | ||
| range. As a half open range, the end is excluded. | ||
| start_seconds (float): Time, in seconds, of the start of the range. | ||
| stop_seconds (float): Time, in seconds, of the end of the range. | ||
| As a half open range, the end is excluded. | ||
| fps (float, optional): If specified, resample output to this frame | ||
| rate by duplicating or dropping frames as necessary. If None | ||
| (default), returns frames at the source video's frame rate. | ||
|
|
||
| Returns: | ||
| FrameBatch: The frames within the specified range. | ||
| """ | ||
| if not start_seconds <= stop_seconds: | ||
| raise ValueError( | ||
| f"Invalid start seconds: {start_seconds}. It must be less than or equal to stop seconds ({stop_seconds})." | ||
| f"Invalid start seconds: {start_seconds}. " | ||
| f"It must be less than or equal to stop seconds ({stop_seconds})." | ||
| ) | ||
| if not self._begin_stream_seconds <= start_seconds < self._end_stream_seconds: | ||
| raise ValueError( | ||
| f"Invalid start seconds: {start_seconds}. " | ||
| f"It must be greater than or equal to {self._begin_stream_seconds} " | ||
| f"and less than or equal to {self._end_stream_seconds}." | ||
| f"and less than {self._end_stream_seconds}." | ||
| ) | ||
| if not stop_seconds <= self._end_stream_seconds: | ||
| raise ValueError( | ||
|
|
@@ -488,9 +486,27 @@ def get_frames_played_in_range( | |
| self._decoder, | ||
| start_seconds=start_seconds, | ||
| stop_seconds=stop_seconds, | ||
| fps=fps, | ||
| ) | ||
| return FrameBatch(*frames) | ||
|
|
||
| def get_all_frames(self, fps: float | None = None) -> FrameBatch: | ||
| """Returns all frames in the video. | ||
|
|
||
| Args: | ||
| fps (float, optional): If specified, resample output to this frame | ||
| rate by duplicating or dropping frames as necessary. If None | ||
| (default), returns frames at the source video's frame rate. | ||
|
|
||
| Returns: | ||
| FrameBatch: All frames in the video. | ||
| """ | ||
| return self.get_frames_played_in_range( | ||
| start_seconds=self._begin_stream_seconds, | ||
| stop_seconds=self._end_stream_seconds, | ||
| fps=fps, | ||
| ) | ||
|
|
||
|
|
||
| def _get_and_validate_stream_metadata( | ||
| *, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a slight preference for
SingleStreamDecoderto not take optionals, and for the bridge code incustom_ops.cppto take care of optional logic. But, there are other APIs inSingleStreamDecoderwhich do take optionals, I'll invite @Dan-Flores and @NicolasHug to make a call here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems natural to me to have an optional here. Letting
custom_ops.cppbe the bridge, I think, would mean that we'd have to create more private method to split the implementation (for e.g. the common checks at the beginning, and the common logic at the end).