diff --git a/src/torchcodec/_core/Metadata.h b/src/torchcodec/_core/Metadata.h index a8f300f49..dcbf7f896 100644 --- a/src/torchcodec/_core/Metadata.h +++ b/src/torchcodec/_core/Metadata.h @@ -25,22 +25,22 @@ struct StreamMetadata { AVMediaType mediaType; std::optional codecId; std::optional codecName; - std::optional durationSeconds; - std::optional beginStreamFromHeader; - std::optional numFrames; + std::optional durationSecondsFromHeader; + std::optional beginStreamSecondsFromHeader; + std::optional numFramesFromHeader; std::optional numKeyFrames; - std::optional averageFps; + std::optional averageFpsFromHeader; std::optional bitRate; // More accurate duration, obtained by scanning the file. // These presentation timestamps are in time base. - std::optional minPtsFromScan; - std::optional maxPtsFromScan; + std::optional beginStreamPtsFromContent; + std::optional endStreamPtsFromContent; // These presentation timestamps are in seconds. - std::optional minPtsSecondsFromScan; - std::optional maxPtsSecondsFromScan; + std::optional beginStreamPtsSecondsFromContent; + std::optional endStreamPtsSecondsFromContent; // This can be useful for index-based seeking. - std::optional numFramesFromScan; + std::optional numFramesFromContent; // Video-only fields derived from the AVCodecContext. std::optional width; @@ -58,7 +58,7 @@ struct ContainerMetadata { int numVideoStreams = 0; // Note that this is the container-level duration, which is usually the max // of all stream durations available in the container. - std::optional durationSeconds; + std::optional durationSecondsFromHeader; // Total BitRate level information at the container level in bit/s std::optional bitRate; // If set, this is the index to the default audio stream. diff --git a/src/torchcodec/_core/SingleStreamDecoder.cpp b/src/torchcodec/_core/SingleStreamDecoder.cpp index f4a285ec6..a66281cdb 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.cpp +++ b/src/torchcodec/_core/SingleStreamDecoder.cpp @@ -125,22 +125,22 @@ void SingleStreamDecoder::initializeDecoder() { int64_t frameCount = avStream->nb_frames; if (frameCount > 0) { - streamMetadata.numFrames = frameCount; + streamMetadata.numFramesFromHeader = frameCount; } if (avStream->duration > 0 && avStream->time_base.den > 0) { - streamMetadata.durationSeconds = + streamMetadata.durationSecondsFromHeader = av_q2d(avStream->time_base) * avStream->duration; } if (avStream->start_time != AV_NOPTS_VALUE) { - streamMetadata.beginStreamFromHeader = + streamMetadata.beginStreamSecondsFromHeader = av_q2d(avStream->time_base) * avStream->start_time; } if (avStream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { double fps = av_q2d(avStream->r_frame_rate); if (fps > 0) { - streamMetadata.averageFps = fps; + streamMetadata.averageFpsFromHeader = fps; } containerMetadata_.numVideoStreams++; } else if (avStream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { @@ -163,7 +163,7 @@ void SingleStreamDecoder::initializeDecoder() { if (formatContext_->duration > 0) { AVRational defaultTimeBase{1, AV_TIME_BASE}; - containerMetadata_.durationSeconds = + containerMetadata_.durationSecondsFromHeader = ptsToSeconds(formatContext_->duration, defaultTimeBase); } @@ -236,13 +236,14 @@ void SingleStreamDecoder::scanFileAndUpdateMetadataAndIndex() { // record its relevant metadata. int streamIndex = packet->stream_index; auto& streamMetadata = containerMetadata_.allStreamMetadata[streamIndex]; - streamMetadata.minPtsFromScan = std::min( - streamMetadata.minPtsFromScan.value_or(INT64_MAX), getPtsOrDts(packet)); - streamMetadata.maxPtsFromScan = std::max( - streamMetadata.maxPtsFromScan.value_or(INT64_MIN), + streamMetadata.beginStreamPtsFromContent = std::min( + streamMetadata.beginStreamPtsFromContent.value_or(INT64_MAX), + getPtsOrDts(packet)); + streamMetadata.endStreamPtsFromContent = std::max( + streamMetadata.endStreamPtsFromContent.value_or(INT64_MIN), getPtsOrDts(packet) + packet->duration); - streamMetadata.numFramesFromScan = - streamMetadata.numFramesFromScan.value_or(0) + 1; + streamMetadata.numFramesFromContent = + streamMetadata.numFramesFromContent.value_or(0) + 1; // Note that we set the other value in this struct, nextPts, only after // we have scanned all packets and sorted by pts. @@ -262,16 +263,17 @@ void SingleStreamDecoder::scanFileAndUpdateMetadataAndIndex() { auto& streamMetadata = containerMetadata_.allStreamMetadata[streamIndex]; auto avStream = formatContext_->streams[streamIndex]; - streamMetadata.numFramesFromScan = + streamMetadata.numFramesFromContent = streamInfos_[streamIndex].allFrames.size(); - if (streamMetadata.minPtsFromScan.has_value()) { - streamMetadata.minPtsSecondsFromScan = - *streamMetadata.minPtsFromScan * av_q2d(avStream->time_base); + if (streamMetadata.beginStreamPtsFromContent.has_value()) { + streamMetadata.beginStreamPtsSecondsFromContent = + *streamMetadata.beginStreamPtsFromContent * + av_q2d(avStream->time_base); } - if (streamMetadata.maxPtsFromScan.has_value()) { - streamMetadata.maxPtsSecondsFromScan = - *streamMetadata.maxPtsFromScan * av_q2d(avStream->time_base); + if (streamMetadata.endStreamPtsFromContent.has_value()) { + streamMetadata.endStreamPtsSecondsFromContent = + *streamMetadata.endStreamPtsFromContent * av_q2d(avStream->time_base); } } @@ -445,7 +447,7 @@ void SingleStreamDecoder::addVideoStream( containerMetadata_.allStreamMetadata[activeStreamIndex_]; if (seekMode_ == SeekMode::approximate && - !streamMetadata.averageFps.has_value()) { + !streamMetadata.averageFpsFromHeader.has_value()) { throw std::runtime_error( "Seek mode is approximate, but stream " + std::to_string(activeStreamIndex_) + @@ -1422,9 +1424,9 @@ int64_t SingleStreamDecoder::secondsToIndexLowerBound(double seconds) { auto& streamMetadata = containerMetadata_.allStreamMetadata[activeStreamIndex_]; TORCH_CHECK( - streamMetadata.averageFps.has_value(), + streamMetadata.averageFpsFromHeader.has_value(), "Cannot use approximate mode since we couldn't find the average fps from the metadata."); - return std::floor(seconds * streamMetadata.averageFps.value()); + return std::floor(seconds * streamMetadata.averageFpsFromHeader.value()); } default: throw std::runtime_error("Unknown SeekMode"); @@ -1449,9 +1451,9 @@ int64_t SingleStreamDecoder::secondsToIndexUpperBound(double seconds) { auto& streamMetadata = containerMetadata_.allStreamMetadata[activeStreamIndex_]; TORCH_CHECK( - streamMetadata.averageFps.has_value(), + streamMetadata.averageFpsFromHeader.has_value(), "Cannot use approximate mode since we couldn't find the average fps from the metadata."); - return std::ceil(seconds * streamMetadata.averageFps.value()); + return std::ceil(seconds * streamMetadata.averageFpsFromHeader.value()); } default: throw std::runtime_error("Unknown SeekMode"); @@ -1467,10 +1469,11 @@ int64_t SingleStreamDecoder::getPts(int64_t frameIndex) { auto& streamMetadata = containerMetadata_.allStreamMetadata[activeStreamIndex_]; TORCH_CHECK( - streamMetadata.averageFps.has_value(), + streamMetadata.averageFpsFromHeader.has_value(), "Cannot use approximate mode since we couldn't find the average fps from the metadata."); return secondsToClosestPts( - frameIndex / streamMetadata.averageFps.value(), streamInfo.timeBase); + frameIndex / streamMetadata.averageFpsFromHeader.value(), + streamInfo.timeBase); } default: throw std::runtime_error("Unknown SeekMode"); @@ -1485,9 +1488,9 @@ std::optional SingleStreamDecoder::getNumFrames( const StreamMetadata& streamMetadata) { switch (seekMode_) { case SeekMode::exact: - return streamMetadata.numFramesFromScan.value(); + return streamMetadata.numFramesFromContent.value(); case SeekMode::approximate: { - return streamMetadata.numFrames; + return streamMetadata.numFramesFromHeader; } default: throw std::runtime_error("Unknown SeekMode"); @@ -1498,7 +1501,7 @@ double SingleStreamDecoder::getMinSeconds( const StreamMetadata& streamMetadata) { switch (seekMode_) { case SeekMode::exact: - return streamMetadata.minPtsSecondsFromScan.value(); + return streamMetadata.beginStreamPtsSecondsFromContent.value(); case SeekMode::approximate: return 0; default: @@ -1510,9 +1513,9 @@ std::optional SingleStreamDecoder::getMaxSeconds( const StreamMetadata& streamMetadata) { switch (seekMode_) { case SeekMode::exact: - return streamMetadata.maxPtsSecondsFromScan.value(); + return streamMetadata.endStreamPtsSecondsFromContent.value(); case SeekMode::approximate: { - return streamMetadata.durationSeconds; + return streamMetadata.durationSecondsFromHeader; } default: throw std::runtime_error("Unknown SeekMode"); diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h index cf46494c7..dec102d1d 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.h +++ b/src/torchcodec/_core/SingleStreamDecoder.h @@ -121,7 +121,7 @@ class SingleStreamDecoder { // // Valid values for startSeconds and stopSeconds are: // - // [minPtsSecondsFromScan, maxPtsSecondsFromScan) + // [beginStreamPtsSecondsFromContent, endStreamPtsSecondsFromContent) FrameBatchOutput getFramesPlayedInRange( double startSeconds, double stopSeconds); diff --git a/src/torchcodec/_core/_metadata.py b/src/torchcodec/_core/_metadata.py index 58c163669..c15e86e74 100644 --- a/src/torchcodec/_core/_metadata.py +++ b/src/torchcodec/_core/_metadata.py @@ -225,9 +225,11 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata: for stream_index in range(container_dict["numStreams"]): stream_dict = json.loads(_get_stream_json_metadata(decoder, stream_index)) common_meta = dict( - duration_seconds_from_header=stream_dict.get("durationSeconds"), + duration_seconds_from_header=stream_dict.get("durationSecondsFromHeader"), bit_rate=stream_dict.get("bitRate"), - begin_stream_seconds_from_header=stream_dict.get("beginStreamFromHeader"), + begin_stream_seconds_from_header=stream_dict.get( + "beginStreamSecondsFromHeader" + ), codec=stream_dict.get("codec"), stream_index=stream_index, ) @@ -235,16 +237,16 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata: streams_metadata.append( VideoStreamMetadata( begin_stream_seconds_from_content=stream_dict.get( - "minPtsSecondsFromScan" + "beginStreamSecondsFromContent" ), end_stream_seconds_from_content=stream_dict.get( - "maxPtsSecondsFromScan" + "endStreamSecondsFromContent" ), width=stream_dict.get("width"), height=stream_dict.get("height"), - num_frames_from_header=stream_dict.get("numFrames"), - num_frames_from_content=stream_dict.get("numFramesFromScan"), - average_fps_from_header=stream_dict.get("averageFps"), + num_frames_from_header=stream_dict.get("numFramesFromHeader"), + num_frames_from_content=stream_dict.get("numFramesFromContent"), + average_fps_from_header=stream_dict.get("averageFpsFromHeader"), **common_meta, ) ) @@ -264,7 +266,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata: streams_metadata.append(StreamMetadata(**common_meta)) return ContainerMetadata( - duration_seconds_from_header=container_dict.get("durationSeconds"), + duration_seconds_from_header=container_dict.get("durationSecondsFromHeader"), bit_rate_from_header=container_dict.get("bitRate"), best_video_stream_index=container_dict.get("bestVideoStreamIndex"), best_audio_stream_index=container_dict.get("bestAudioStreamIndex"), diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 4a1c414b0..192abba45 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -456,18 +456,20 @@ std::string get_json_metadata(at::Tensor& decoder) { std::map metadataMap; // serialize the metadata into a string std::stringstream ss; - double durationSeconds = 0; + double durationSecondsFromHeader = 0; if (maybeBestVideoStreamIndex.has_value() && videoMetadata.allStreamMetadata[*maybeBestVideoStreamIndex] - .durationSeconds.has_value()) { - durationSeconds = + .durationSecondsFromHeader.has_value()) { + durationSecondsFromHeader = videoMetadata.allStreamMetadata[*maybeBestVideoStreamIndex] - .durationSeconds.value_or(0); + .durationSecondsFromHeader.value_or(0); } else { // Fallback to container-level duration if stream duration is not found. - durationSeconds = videoMetadata.durationSeconds.value_or(0); + durationSecondsFromHeader = + videoMetadata.durationSecondsFromHeader.value_or(0); } - metadataMap["durationSeconds"] = std::to_string(durationSeconds); + metadataMap["durationSecondsFromHeader"] = + std::to_string(durationSecondsFromHeader); if (videoMetadata.bitRate.has_value()) { metadataMap["bitRate"] = std::to_string(videoMetadata.bitRate.value()); @@ -476,19 +478,20 @@ std::string get_json_metadata(at::Tensor& decoder) { if (maybeBestVideoStreamIndex.has_value()) { auto streamMetadata = videoMetadata.allStreamMetadata[*maybeBestVideoStreamIndex]; - if (streamMetadata.numFramesFromScan.has_value()) { - metadataMap["numFrames"] = - std::to_string(*streamMetadata.numFramesFromScan); - } else if (streamMetadata.numFrames.has_value()) { - metadataMap["numFrames"] = std::to_string(*streamMetadata.numFrames); + if (streamMetadata.numFramesFromContent.has_value()) { + metadataMap["numFramesFromHeader"] = + std::to_string(*streamMetadata.numFramesFromContent); + } else if (streamMetadata.numFramesFromHeader.has_value()) { + metadataMap["numFramesFromHeader"] = + std::to_string(*streamMetadata.numFramesFromHeader); } - if (streamMetadata.minPtsSecondsFromScan.has_value()) { - metadataMap["minPtsSecondsFromScan"] = - std::to_string(*streamMetadata.minPtsSecondsFromScan); + if (streamMetadata.beginStreamPtsSecondsFromContent.has_value()) { + metadataMap["beginStreamSecondsFromContent"] = + std::to_string(*streamMetadata.beginStreamPtsSecondsFromContent); } - if (streamMetadata.maxPtsSecondsFromScan.has_value()) { - metadataMap["maxPtsSecondsFromScan"] = - std::to_string(*streamMetadata.maxPtsSecondsFromScan); + if (streamMetadata.endStreamPtsSecondsFromContent.has_value()) { + metadataMap["endStreamSecondsFromContent"] = + std::to_string(*streamMetadata.endStreamPtsSecondsFromContent); } if (streamMetadata.codecName.has_value()) { metadataMap["codec"] = quoteValue(streamMetadata.codecName.value()); @@ -499,8 +502,9 @@ std::string get_json_metadata(at::Tensor& decoder) { if (streamMetadata.height.has_value()) { metadataMap["height"] = std::to_string(*streamMetadata.height); } - if (streamMetadata.averageFps.has_value()) { - metadataMap["averageFps"] = std::to_string(*streamMetadata.averageFps); + if (streamMetadata.averageFpsFromHeader.has_value()) { + metadataMap["averageFpsFromHeader"] = + std::to_string(*streamMetadata.averageFpsFromHeader); } } if (videoMetadata.bestVideoStreamIndex.has_value()) { @@ -523,8 +527,9 @@ std::string get_container_json_metadata(at::Tensor& decoder) { std::map map; - if (containerMetadata.durationSeconds.has_value()) { - map["durationSeconds"] = std::to_string(*containerMetadata.durationSeconds); + if (containerMetadata.durationSecondsFromHeader.has_value()) { + map["durationSecondsFromHeader"] = + std::to_string(*containerMetadata.durationSecondsFromHeader); } if (containerMetadata.bitRate.has_value()) { @@ -562,30 +567,32 @@ std::string get_stream_json_metadata( std::map map; - if (streamMetadata.durationSeconds.has_value()) { - map["durationSeconds"] = std::to_string(*streamMetadata.durationSeconds); + if (streamMetadata.durationSecondsFromHeader.has_value()) { + map["durationSecondsFromHeader"] = + std::to_string(*streamMetadata.durationSecondsFromHeader); } if (streamMetadata.bitRate.has_value()) { map["bitRate"] = std::to_string(*streamMetadata.bitRate); } - if (streamMetadata.numFramesFromScan.has_value()) { - map["numFramesFromScan"] = - std::to_string(*streamMetadata.numFramesFromScan); + if (streamMetadata.numFramesFromContent.has_value()) { + map["numFramesFromContent"] = + std::to_string(*streamMetadata.numFramesFromContent); } - if (streamMetadata.numFrames.has_value()) { - map["numFrames"] = std::to_string(*streamMetadata.numFrames); + if (streamMetadata.numFramesFromHeader.has_value()) { + map["numFramesFromHeader"] = + std::to_string(*streamMetadata.numFramesFromHeader); } - if (streamMetadata.beginStreamFromHeader.has_value()) { - map["beginStreamFromHeader"] = - std::to_string(*streamMetadata.beginStreamFromHeader); + if (streamMetadata.beginStreamSecondsFromHeader.has_value()) { + map["beginStreamSecondsFromHeader"] = + std::to_string(*streamMetadata.beginStreamSecondsFromHeader); } - if (streamMetadata.minPtsSecondsFromScan.has_value()) { - map["minPtsSecondsFromScan"] = - std::to_string(*streamMetadata.minPtsSecondsFromScan); + if (streamMetadata.beginStreamPtsSecondsFromContent.has_value()) { + map["beginStreamSecondsFromContent"] = + std::to_string(*streamMetadata.beginStreamPtsSecondsFromContent); } - if (streamMetadata.maxPtsSecondsFromScan.has_value()) { - map["maxPtsSecondsFromScan"] = - std::to_string(*streamMetadata.maxPtsSecondsFromScan); + if (streamMetadata.endStreamPtsSecondsFromContent.has_value()) { + map["endStreamSecondsFromContent"] = + std::to_string(*streamMetadata.endStreamPtsSecondsFromContent); } if (streamMetadata.codecName.has_value()) { map["codec"] = quoteValue(streamMetadata.codecName.value()); @@ -596,8 +603,9 @@ std::string get_stream_json_metadata( if (streamMetadata.height.has_value()) { map["height"] = std::to_string(*streamMetadata.height); } - if (streamMetadata.averageFps.has_value()) { - map["averageFps"] = std::to_string(*streamMetadata.averageFps); + if (streamMetadata.averageFpsFromHeader.has_value()) { + map["averageFpsFromHeader"] = + std::to_string(*streamMetadata.averageFpsFromHeader); } if (streamMetadata.sampleRate.has_value()) { map["sampleRate"] = std::to_string(*streamMetadata.sampleRate); diff --git a/src/torchcodec/_samplers/video_clip_sampler.py b/src/torchcodec/_samplers/video_clip_sampler.py index 8a92400f2..664a1b6c2 100644 --- a/src/torchcodec/_samplers/video_clip_sampler.py +++ b/src/torchcodec/_samplers/video_clip_sampler.py @@ -213,7 +213,7 @@ def _get_clips_for_index_based_sampling( sample_end_index = ( min( index_based_sampler_args.sample_end_index + 1, - metadata_json["numFrames"], + metadata_json["numFramesFromHeader"], ) - index_based_sampler_args.video_frame_dilation * index_based_sampler_args.frames_per_clip @@ -263,26 +263,26 @@ def _get_start_seconds( Returns: (`List[float]`): List of the sampled clip start position in seconds """ - video_duration_in_seconds = metadata_json["durationSeconds"] + video_duration_in_seconds = metadata_json["durationSecondsFromHeader"] clip_duration_in_seconds = ( time_based_sampler_args.frames_per_clip * time_based_sampler_args.video_frame_dilation + 1 - ) / metadata_json["averageFps"] + ) / metadata_json["averageFpsFromHeader"] - minPtsSecondsFromScan = ( - metadata_json["minPtsSecondsFromScan"] - if metadata_json["minPtsSecondsFromScan"] + beginStreamSecondsFromContent = ( + metadata_json["beginStreamSecondsFromContent"] + if metadata_json["beginStreamSecondsFromContent"] else 0 ) - maxPtsSecondsFromScan = ( - metadata_json["maxPtsSecondsFromScan"] - if metadata_json["maxPtsSecondsFromScan"] > 0 + endStreamSecondsFromContent = ( + metadata_json["endStreamSecondsFromContent"] + if metadata_json["endStreamSecondsFromContent"] > 0 else video_duration_in_seconds ) last_possible_clip_start_in_seconds = ( - maxPtsSecondsFromScan - clip_duration_in_seconds + endStreamSecondsFromContent - clip_duration_in_seconds ) if last_possible_clip_start_in_seconds < 0: raise VideoTooShortException( @@ -292,7 +292,7 @@ def _get_start_seconds( clip_starts_in_seconds: List[float] = [] sample_start_second = max( time_based_sampler_args.sample_start_second, - minPtsSecondsFromScan, + beginStreamSecondsFromContent, ) sample_end_second = min( last_possible_clip_start_in_seconds, diff --git a/test/VideoDecoderTest.cpp b/test/VideoDecoderTest.cpp index a30609c2a..fd9f2535d 100644 --- a/test/VideoDecoderTest.cpp +++ b/test/VideoDecoderTest.cpp @@ -80,19 +80,19 @@ TEST_P(SingleStreamDecoderTest, ReturnsFpsAndDurationForVideoInMetadata) { const auto& videoStream = metadata.allStreamMetadata[3]; EXPECT_EQ(videoStream.mediaType, AVMEDIA_TYPE_VIDEO); EXPECT_EQ(videoStream.codecName, "h264"); - EXPECT_NEAR(*videoStream.averageFps, 29.97f, 1e-1); + EXPECT_NEAR(*videoStream.averageFpsFromHeader, 29.97f, 1e-1); EXPECT_NEAR(*videoStream.bitRate, 128783, 1e-1); - EXPECT_NEAR(*videoStream.durationSeconds, 13.013, 1e-1); - EXPECT_EQ(videoStream.numFrames, 390); - EXPECT_FALSE(videoStream.minPtsSecondsFromScan.has_value()); - EXPECT_FALSE(videoStream.maxPtsSecondsFromScan.has_value()); - EXPECT_FALSE(videoStream.numFramesFromScan.has_value()); + EXPECT_NEAR(*videoStream.durationSecondsFromHeader, 13.013, 1e-1); + EXPECT_EQ(videoStream.numFramesFromHeader, 390); + EXPECT_FALSE(videoStream.beginStreamPtsSecondsFromContent.has_value()); + EXPECT_FALSE(videoStream.endStreamPtsSecondsFromContent.has_value()); + EXPECT_FALSE(videoStream.numFramesFromContent.has_value()); decoder->scanFileAndUpdateMetadataAndIndex(); metadata = decoder->getContainerMetadata(); const auto& videoStream1 = metadata.allStreamMetadata[3]; - EXPECT_EQ(*videoStream1.minPtsSecondsFromScan, 0); - EXPECT_EQ(*videoStream1.maxPtsSecondsFromScan, 13.013); - EXPECT_EQ(*videoStream1.numFramesFromScan, 390); + EXPECT_EQ(*videoStream1.beginStreamPtsSecondsFromContent, 0); + EXPECT_EQ(*videoStream1.endStreamPtsSecondsFromContent, 13.013); + EXPECT_EQ(*videoStream1.numFramesFromContent, 390); } TEST(SingleStreamDecoderTest, MissingVideoFileThrowsException) { @@ -434,7 +434,7 @@ TEST_P(SingleStreamDecoderTest, GetAudioMetadata) { const auto& audioStream = metadata.allStreamMetadata[0]; EXPECT_EQ(audioStream.mediaType, AVMEDIA_TYPE_AUDIO); - EXPECT_NEAR(*audioStream.durationSeconds, 13.25, 1e-1); + EXPECT_NEAR(*audioStream.durationSecondsFromHeader, 13.25, 1e-1); } INSTANTIATE_TEST_SUITE_P( diff --git a/test/test_ops.py b/test/test_ops.py index 77be702b6..750124835 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -215,7 +215,7 @@ def test_pts_apis_against_index_ref(self, device): metadata = get_json_metadata(decoder) metadata_dict = json.loads(metadata) - num_frames = metadata_dict["numFrames"] + num_frames = metadata_dict["numFramesFromHeader"] assert num_frames == 390 _, all_pts_seconds_ref, _ = zip( @@ -395,9 +395,11 @@ def test_video_get_json_metadata(self): metadata_dict = json.loads(metadata) # We should be able to see all of this metadata without adding a video stream - assert metadata_dict["durationSeconds"] == pytest.approx(13.013, abs=0.001) - assert metadata_dict["numFrames"] == 390 - assert metadata_dict["averageFps"] == pytest.approx(29.97, abs=0.001) + assert metadata_dict["durationSecondsFromHeader"] == pytest.approx( + 13.013, abs=0.001 + ) + assert metadata_dict["numFramesFromHeader"] == 390 + assert metadata_dict["averageFpsFromHeader"] == pytest.approx(29.97, abs=0.001) assert metadata_dict["codec"] == "h264" ffmpeg_dict = get_ffmpeg_library_versions() if ffmpeg_dict["libavformat"][0] >= 60: @@ -412,8 +414,8 @@ def test_video_get_json_metadata_with_stream(self): metadata_dict = json.loads(metadata) assert metadata_dict["width"] == 480 assert metadata_dict["height"] == 270 - assert metadata_dict["minPtsSecondsFromScan"] == 0 - assert metadata_dict["maxPtsSecondsFromScan"] == 13.013 + assert metadata_dict["beginStreamSecondsFromContent"] == 0 + assert metadata_dict["endStreamSecondsFromContent"] == 13.013 def test_get_ffmpeg_version(self): ffmpeg_dict = get_ffmpeg_library_versions()