Skip to content

Commit 7bfefad

Browse files
committed
Merge branch 'main' of github.com:pytorch/torchcodec into header_again
2 parents be1fb02 + eaf3dd3 commit 7bfefad

File tree

4 files changed

+68
-27
lines changed

4 files changed

+68
-27
lines changed

.github/workflows/linux_cuda_wheel.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ jobs:
6767
# include more python versions.
6868
python-version: ['3.9']
6969
cuda-version: ['11.8', '12.4', '12.6']
70-
ffmpeg-version-for-tests: ['5', '6', '7']
70+
# TODO: put back ffmpeg 5 https://github.com/pytorch/torchcodec/issues/325
71+
ffmpeg-version-for-tests: ['6', '7']
7172
container:
7273
image: "pytorch/manylinux2_28-builder:cuda${{ matrix.cuda-version }}"
7374
options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"

src/torchcodec/decoders/_core/FFMPEGCommon.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ class AutoAVPacket {
9292

9393
public:
9494
AutoAVPacket();
95+
AutoAVPacket(const AutoAVPacket& other) = delete;
96+
AutoAVPacket& operator=(const AutoAVPacket& other) = delete;
9597
~AutoAVPacket();
9698
};
9799

@@ -100,7 +102,9 @@ class ReferenceAVPacket {
100102
AVPacket* avPacket_;
101103

102104
public:
103-
ReferenceAVPacket(AutoAVPacket& shared);
105+
explicit ReferenceAVPacket(AutoAVPacket& shared);
106+
ReferenceAVPacket(const ReferenceAVPacket& other) = delete;
107+
ReferenceAVPacket& operator=(const ReferenceAVPacket& other) = delete;
104108
~ReferenceAVPacket();
105109
AVPacket* get();
106110
AVPacket* operator->();

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -804,16 +804,20 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
804804
}
805805
}
806806

807-
VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
807+
VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
808808
std::function<bool(int, AVFrame*)> filterFunction) {
809809
if (activeStreamIndices_.size() == 0) {
810810
throw std::runtime_error("No active streams configured.");
811811
}
812+
812813
resetDecodeStats();
814+
815+
// Seek if needed.
813816
if (desiredPtsSeconds_.has_value()) {
814817
maybeSeekToBeforeDesiredPts();
815818
desiredPtsSeconds_ = std::nullopt;
816819
}
820+
817821
// Need to get the next frame or error from PopFrame.
818822
UniqueAVFrame avFrame(av_frame_alloc());
819823
AutoAVPacket autoAVPacket;
@@ -823,42 +827,58 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
823827
while (true) {
824828
frameStreamIndex = -1;
825829
bool gotPermanentErrorOnAnyActiveStream = false;
830+
831+
// Get a frame on an active stream. Note that we don't know ahead of time
832+
// which streams have frames to receive, so we linearly try the active
833+
// streams.
826834
for (int streamIndex : activeStreamIndices_) {
827835
StreamInfo& streamInfo = streamInfos_[streamIndex];
828836
ffmpegStatus =
829837
avcodec_receive_frame(streamInfo.codecContext.get(), avFrame.get());
830-
bool gotNonRetriableError =
831-
ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR(EAGAIN);
832-
if (gotNonRetriableError) {
838+
839+
if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR(EAGAIN)) {
833840
gotPermanentErrorOnAnyActiveStream = true;
834841
break;
835842
}
843+
836844
if (ffmpegStatus == AVSUCCESS) {
837845
frameStreamIndex = streamIndex;
838846
break;
839847
}
840848
}
849+
841850
if (gotPermanentErrorOnAnyActiveStream) {
842851
break;
843852
}
853+
844854
decodeStats_.numFramesReceivedByDecoder++;
845-
bool gotNeededFrame = ffmpegStatus == AVSUCCESS &&
846-
filterFunction(frameStreamIndex, avFrame.get());
847-
if (gotNeededFrame) {
855+
856+
// Is this the kind of frame we're looking for?
857+
if (ffmpegStatus == AVSUCCESS &&
858+
filterFunction(frameStreamIndex, avFrame.get())) {
859+
// Yes, this is the frame we'll return; break out of the decoding loop.
848860
break;
849861
} else if (ffmpegStatus == AVSUCCESS) {
850-
// No need to send more packets here as the decoder may have frames in
851-
// its buffer.
862+
// No, but we received a valid frame - just not the kind we're looking
863+
// for. The logic below will read packets and send them to the decoder.
864+
// But since we did just receive a frame, we should skip reading more
865+
// packets and sending them to the decoder and just try to receive more
866+
// frames from the decoder.
852867
continue;
853868
}
869+
854870
if (reachedEOF) {
855871
// We don't have any more packets to send to the decoder. So keep on
856872
// pulling frames from its internal buffers.
857873
continue;
858874
}
875+
876+
// We still haven't found the frame we're looking for. So let's read more
877+
// packets and send them to the decoder.
859878
ReferenceAVPacket packet(autoAVPacket);
860879
ffmpegStatus = av_read_frame(formatContext_.get(), packet.get());
861880
decodeStats_.numPacketsRead++;
881+
862882
if (ffmpegStatus == AVERROR_EOF) {
863883
// End of file reached. We must drain all codecs by sending a nullptr
864884
// packet.
@@ -873,27 +893,38 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
873893
getFFMPEGErrorStringFromErrorCode(ffmpegStatus));
874894
}
875895
}
896+
897+
// We've reached the end of file so we can't read any more packets from
898+
// it, but the decoder may still have frames to read in its buffer.
899+
// Continue iterating to try reading frames.
876900
reachedEOF = true;
877901
continue;
878902
}
903+
879904
if (ffmpegStatus < AVSUCCESS) {
880905
throw std::runtime_error(
881906
"Could not read frame from input file: " +
882907
getFFMPEGErrorStringFromErrorCode(ffmpegStatus));
883908
}
909+
884910
if (activeStreamIndices_.count(packet->stream_index) == 0) {
885911
// This packet is not for any of the active streams.
886912
continue;
887913
}
914+
915+
// We got a valid packet. Send it to the decoder, and we'll receive it in
916+
// the next iteration.
888917
ffmpegStatus = avcodec_send_packet(
889918
streamInfos_[packet->stream_index].codecContext.get(), packet.get());
890919
if (ffmpegStatus < AVSUCCESS) {
891920
throw std::runtime_error(
892921
"Could not push packet to decoder: " +
893922
getFFMPEGErrorStringFromErrorCode(ffmpegStatus));
894923
}
924+
895925
decodeStats_.numPacketsSentToDecoder++;
896926
}
927+
897928
if (ffmpegStatus < AVSUCCESS) {
898929
if (reachedEOF || ffmpegStatus == AVERROR_EOF) {
899930
throw VideoDecoder::EndOfFileException(
@@ -904,6 +935,7 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
904935
"Could not receive frame from decoder: " +
905936
getFFMPEGErrorStringFromErrorCode(ffmpegStatus));
906937
}
938+
907939
// Note that we don't flush the decoder when we reach EOF (even though that's
908940
// mentioned in https://ffmpeg.org/doxygen/trunk/group__lavc__encdec.html).
909941
// This is because we may have packets internally in the decoder that we
@@ -913,10 +945,8 @@ VideoDecoder::AVFrameStream VideoDecoder::getAVFrameUsingFilterFunction(
913945
StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
914946
activeStreamInfo.currentPts = avFrame->pts;
915947
activeStreamInfo.currentDuration = getDuration(avFrame);
916-
AVFrameStream avFrameStream;
917-
avFrameStream.streamIndex = frameStreamIndex;
918-
avFrameStream.avFrame = std::move(avFrame);
919-
return avFrameStream;
948+
949+
return AVFrameStream(std::move(avFrame), frameStreamIndex);
920950
}
921951

922952
VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput(
@@ -1080,8 +1110,8 @@ VideoDecoder::FrameOutput VideoDecoder::getFramePlayedAtNoDemux(
10801110
}
10811111

10821112
setCursorPtsInSeconds(seconds);
1083-
AVFrameStream avFrameStream = getAVFrameUsingFilterFunction(
1084-
[seconds, this](int frameStreamIndex, AVFrame* avFrame) {
1113+
AVFrameStream avFrameStream =
1114+
decodeAVFrame([seconds, this](int frameStreamIndex, AVFrame* avFrame) {
10851115
StreamInfo& streamInfo = streamInfos_[frameStreamIndex];
10861116
double frameStartTime = ptsToSeconds(avFrame->pts, streamInfo.timeBase);
10871117
double frameEndTime = ptsToSeconds(
@@ -1481,8 +1511,8 @@ VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemux() {
14811511

14821512
VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemuxInternal(
14831513
std::optional<torch::Tensor> preAllocatedOutputTensor) {
1484-
AVFrameStream avFrameStream = getAVFrameUsingFilterFunction(
1485-
[this](int frameStreamIndex, AVFrame* avFrame) {
1514+
AVFrameStream avFrameStream =
1515+
decodeAVFrame([this](int frameStreamIndex, AVFrame* avFrame) {
14861516
StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
14871517
return avFrame->pts >= activeStreamInfo.discardFramesBeforePts;
14881518
});

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ class VideoDecoder {
2929

3030
enum class SeekMode { exact, approximate };
3131

32+
explicit VideoDecoder(const std::string& videoFilePath, SeekMode seekMode);
33+
explicit VideoDecoder(const void* buffer, size_t length, SeekMode seekMode);
34+
3235
// Creates a VideoDecoder from the video at videoFilePath.
3336
static std::unique_ptr<VideoDecoder> createFromFilePath(
3437
const std::string& videoFilePath,
@@ -247,6 +250,9 @@ class VideoDecoder {
247250
UniqueAVFrame avFrame;
248251
// The stream index of the decoded frame.
249252
int streamIndex;
253+
254+
explicit AVFrameStream(UniqueAVFrame&& a, int s)
255+
: avFrame(std::move(a)), streamIndex(s) {}
250256
};
251257

252258
// Once getFrameAtIndex supports the preAllocatedOutputTensor parameter, we
@@ -278,6 +284,7 @@ class VideoDecoder {
278284
// --------------------------------------------------------------------------
279285
// STREAMINFO AND ASSOCIATED STRUCTS
280286
// --------------------------------------------------------------------------
287+
281288
struct FrameInfo {
282289
int64_t pts = 0;
283290
// The value of this default is important: the last frame's nextPts will be
@@ -326,10 +333,10 @@ class VideoDecoder {
326333
int64_t discardFramesBeforePts = INT64_MIN;
327334
VideoStreamOptions videoStreamOptions;
328335

329-
// color-conversion fields. Only one of FilterGraphContextr and
336+
// color-conversion fields. Only one of FilterGraphContext and
330337
// UniqueSwsContext should be non-null.
331-
ColorConversionLibrary colorConversionLibrary = FILTERGRAPH;
332338
FilterGraphContext filterGraphContext;
339+
ColorConversionLibrary colorConversionLibrary = FILTERGRAPH;
333340
UniqueSwsContext swsContext;
334341

335342
// Used to know whether a new FilterGraphContext or UniqueSwsContext should
@@ -338,12 +345,9 @@ class VideoDecoder {
338345
};
339346

340347
// --------------------------------------------------------------------------
341-
// CONSTRUCTORS AND INITIALIZERS
348+
// INITIALIZERS
342349
// --------------------------------------------------------------------------
343-
// Don't use those, use the static methods to create a decoder object.
344350

345-
explicit VideoDecoder(const std::string& videoFilePath, SeekMode seekMode);
346-
explicit VideoDecoder(const void* buffer, size_t length, SeekMode seekMode);
347351
void initializeDecoder();
348352
void updateMetadataWithCodecContext(
349353
int streamIndex,
@@ -360,8 +364,8 @@ class VideoDecoder {
360364

361365
void maybeSeekToBeforeDesiredPts();
362366

363-
AVFrameStream getAVFrameUsingFilterFunction(
364-
std::function<bool(int, AVFrame*)>);
367+
AVFrameStream decodeAVFrame(
368+
std::function<bool(int, AVFrame*)> filterFunction);
365369

366370
FrameOutput getNextFrameNoDemuxInternal(
367371
std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
@@ -435,6 +439,8 @@ class VideoDecoder {
435439
// STREAM AND METADATA APIS
436440
// --------------------------------------------------------------------------
437441

442+
void populateVideoMetadataFromStreamIndex(int streamIndex);
443+
438444
// Returns the "best" stream index for a given media type. The "best" is
439445
// determined by various heuristics in FFMPEG.
440446
// See

0 commit comments

Comments
 (0)