11#include < sstream>
2- extern " C" {
3- #include < libavutil/pixdesc.h>
4- }
52
63#include " src/torchcodec/_core/AVIOTensorContext.h"
74#include " src/torchcodec/_core/Encoder.h"
@@ -582,23 +579,9 @@ VideoEncoder::VideoEncoder(
582579
583580void VideoEncoder::initializeEncoder (
584581 const VideoStreamOptions& videoStreamOptions) {
585- av_log_set_level (AV_LOG_DEBUG);
586-
587- // Always try default
588- // This works for flv (format accepts libx264, but errors)
589- // but fails for avi (should use libx264, but defaults to mpeg4)
590582 const AVCodec* avCodec =
591583 avcodec_find_encoder (avFormatContext_->oformat ->video_codec );
592- // Try libx264 first, then fallback to default ffmpeg
593- // const AVCodec* avCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
594- // if (avCodec == nullptr || avformat_query_codec(avFormatContext_->oformat,
595- // avCodec->id, 0) == 0) {
596- // std::cout << "for " << avFormatContext_->oformat
597- // << ", 264 was unavailable or unsupported! " << std::endl;
598- // avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec);
599- // }
600584 TORCH_CHECK (avCodec != nullptr , " Video codec not found" );
601- std::cout << " Using codec: " << avCodec->name << std::endl;
602585
603586 AVCodecContext* avCodecContext = avcodec_alloc_context3 (avCodec);
604587 TORCH_CHECK (avCodecContext != nullptr , " Couldn't allocate codec context." );
@@ -616,25 +599,15 @@ void VideoEncoder::initializeEncoder(
616599 outWidth_ = videoStreamOptions.width .value_or (inWidth_);
617600 outHeight_ = videoStreamOptions.height .value_or (inHeight_);
618601
619- // Use YUV444P as default output format for lossless encoding
620602 // TODO-VideoEncoder: Enable other pixel formats
621- // outPixelFormat_ = AV_PIX_FMT_YUV444P;
622- // outPixelFormat_ = AV_PIX_FMT_YUV420P;
623-
624- // use first?
625- // outPixelFormat_ = getSupportedPixelFormats(*avCodec)[0];
626-
627603 // Let FFmpeg choose best pixel format to minimize loss
628- int loss = 0 ;
629604 outPixelFormat_ = avcodec_find_best_pix_fmt_of_list (
630- getSupportedPixelFormats (*avCodec), // List of codec- supported formats
605+ getSupportedPixelFormats (*avCodec), // List of supported formats
631606 AV_PIX_FMT_GBRP, // We reorder input to GBRP currently
632607 0 , // No alpha channel
633- &loss // Information about conversion losses
608+ 0 // Discard conversion loss information
634609 );
635610 TORCH_CHECK (outPixelFormat_ != -1 , " Failed to find best pix fmt" )
636- std::cout << " Using pixel format: " << av_get_pix_fmt_name (outPixelFormat_)
637- << std::endl;
638611
639612 // Configure codec parameters
640613 avCodecContext_->codec_id = avCodec->id ;
@@ -645,21 +618,19 @@ void VideoEncoder::initializeEncoder(
645618 avCodecContext_->time_base = {1 , inFrameRate_};
646619 avCodecContext_->framerate = {inFrameRate_, 1 };
647620
648- // Set global header flag for containers that need it (like Matroska)
649- // This populates extradata to enable mkv encoding
650- // https://stackoverflow.com/questions/60278773/invalid-data-when-creating-mkv-container-with-h264-stream-because-extradata-is-n
621+ // Set flag for containers that require extradata to be in the codec context
651622 if (avFormatContext_->oformat ->flags & AVFMT_GLOBALHEADER) {
652623 avCodecContext_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
653624 }
654625
655- // accept optional args
626+ // Apply videoStreamOptions
656627 AVDictionary* options = nullptr ;
657628 if (videoStreamOptions.crf .has_value ()) {
658629 av_dict_set (
659630 &options,
660631 " crf" ,
661- " 0 " ,
662- videoStreamOptions. crf . value ()); // Needed to produce lossless videos
632+ std::to_string (videoStreamOptions. crf . value ()). c_str () ,
633+ 0 );
663634 }
664635 int status = avcodec_open2 (avCodecContext_.get (), avCodec, &options);
665636 av_dict_free (&options);
@@ -684,12 +655,10 @@ void VideoEncoder::initializeEncoder(
684655}
685656
686657void VideoEncoder::encode () {
687- av_log_set_level (AV_LOG_DEBUG);
688658 // To be on the safe side we enforce that encode() can only be called once
689659 TORCH_CHECK (!encodeWasCalled_, " Cannot call encode() twice." );
690660 encodeWasCalled_ = true ;
691661
692- av_dump_format (avFormatContext_.get (), 0 , avFormatContext_->url , 1 );
693662 int status = avformat_write_header (avFormatContext_.get (), nullptr );
694663 TORCH_CHECK (
695664 status == AVSUCCESS,
@@ -810,6 +779,9 @@ void VideoEncoder::encodeFrame(
810779 if (packet->duration == 0 ) {
811780 packet->duration = 1 ;
812781 }
782+ // av_packet_rescale_ts ensures encoded frames have correct timestamps.
783+ // This prevents "no more frames" errors when decoding encoded frames,
784+ // https://github.com/pytorch/audio/blob/b6a3368a45aaafe05f1a6a9f10c68adc5e944d9e/src/libtorio/ffmpeg/stream_writer/encoder.cpp#L46
813785 av_packet_rescale_ts (
814786 packet.get (), avCodecContext_->time_base , avStream_->time_base );
815787 packet->stream_index = streamIndex_;
0 commit comments