1
1
#include < sstream>
2
- extern " C" {
3
- #include < libavutil/pixdesc.h>
4
- }
5
2
6
3
#include " src/torchcodec/_core/AVIOTensorContext.h"
7
4
#include " src/torchcodec/_core/Encoder.h"
@@ -582,23 +579,9 @@ VideoEncoder::VideoEncoder(
582
579
583
580
void VideoEncoder::initializeEncoder (
584
581
const VideoStreamOptions& videoStreamOptions) {
585
- av_log_set_level (AV_LOG_DEBUG);
586
-
587
- // Always try default
588
- // This works for flv (format accepts libx264, but errors)
589
- // but fails for avi (should use libx264, but defaults to mpeg4)
590
582
const AVCodec* avCodec =
591
583
avcodec_find_encoder (avFormatContext_->oformat ->video_codec );
592
- // Try libx264 first, then fallback to default ffmpeg
593
- // const AVCodec* avCodec = avcodec_find_encoder(AV_CODEC_ID_H264);
594
- // if (avCodec == nullptr || avformat_query_codec(avFormatContext_->oformat,
595
- // avCodec->id, 0) == 0) {
596
- // std::cout << "for " << avFormatContext_->oformat
597
- // << ", 264 was unavailable or unsupported! " << std::endl;
598
- // avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec);
599
- // }
600
584
TORCH_CHECK (avCodec != nullptr , " Video codec not found" );
601
- std::cout << " Using codec: " << avCodec->name << std::endl;
602
585
603
586
AVCodecContext* avCodecContext = avcodec_alloc_context3 (avCodec);
604
587
TORCH_CHECK (avCodecContext != nullptr , " Couldn't allocate codec context." );
@@ -616,25 +599,15 @@ void VideoEncoder::initializeEncoder(
616
599
outWidth_ = videoStreamOptions.width .value_or (inWidth_);
617
600
outHeight_ = videoStreamOptions.height .value_or (inHeight_);
618
601
619
- // Use YUV444P as default output format for lossless encoding
620
602
// TODO-VideoEncoder: Enable other pixel formats
621
- // outPixelFormat_ = AV_PIX_FMT_YUV444P;
622
- // outPixelFormat_ = AV_PIX_FMT_YUV420P;
623
-
624
- // use first?
625
- // outPixelFormat_ = getSupportedPixelFormats(*avCodec)[0];
626
-
627
603
// Let FFmpeg choose best pixel format to minimize loss
628
- int loss = 0 ;
629
604
outPixelFormat_ = avcodec_find_best_pix_fmt_of_list (
630
- getSupportedPixelFormats (*avCodec), // List of codec- supported formats
605
+ getSupportedPixelFormats (*avCodec), // List of supported formats
631
606
AV_PIX_FMT_GBRP, // We reorder input to GBRP currently
632
607
0 , // No alpha channel
633
- &loss // Information about conversion losses
608
+ 0 // Discard conversion loss information
634
609
);
635
610
TORCH_CHECK (outPixelFormat_ != -1 , " Failed to find best pix fmt" )
636
- std::cout << " Using pixel format: " << av_get_pix_fmt_name (outPixelFormat_)
637
- << std::endl;
638
611
639
612
// Configure codec parameters
640
613
avCodecContext_->codec_id = avCodec->id ;
@@ -645,21 +618,19 @@ void VideoEncoder::initializeEncoder(
645
618
avCodecContext_->time_base = {1 , inFrameRate_};
646
619
avCodecContext_->framerate = {inFrameRate_, 1 };
647
620
648
- // Set global header flag for containers that need it (like Matroska)
649
- // This populates extradata to enable mkv encoding
650
- // https://stackoverflow.com/questions/60278773/invalid-data-when-creating-mkv-container-with-h264-stream-because-extradata-is-n
621
+ // Set flag for containers that require extradata to be in the codec context
651
622
if (avFormatContext_->oformat ->flags & AVFMT_GLOBALHEADER) {
652
623
avCodecContext_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
653
624
}
654
625
655
- // accept optional args
626
+ // Apply videoStreamOptions
656
627
AVDictionary* options = nullptr ;
657
628
if (videoStreamOptions.crf .has_value ()) {
658
629
av_dict_set (
659
630
&options,
660
631
" crf" ,
661
- " 0 " ,
662
- videoStreamOptions. crf . value ()); // Needed to produce lossless videos
632
+ std::to_string (videoStreamOptions. crf . value ()). c_str () ,
633
+ 0 );
663
634
}
664
635
int status = avcodec_open2 (avCodecContext_.get (), avCodec, &options);
665
636
av_dict_free (&options);
@@ -684,12 +655,10 @@ void VideoEncoder::initializeEncoder(
684
655
}
685
656
686
657
void VideoEncoder::encode () {
687
- av_log_set_level (AV_LOG_DEBUG);
688
658
// To be on the safe side we enforce that encode() can only be called once
689
659
TORCH_CHECK (!encodeWasCalled_, " Cannot call encode() twice." );
690
660
encodeWasCalled_ = true ;
691
661
692
- av_dump_format (avFormatContext_.get (), 0 , avFormatContext_->url , 1 );
693
662
int status = avformat_write_header (avFormatContext_.get (), nullptr );
694
663
TORCH_CHECK (
695
664
status == AVSUCCESS,
@@ -810,6 +779,9 @@ void VideoEncoder::encodeFrame(
810
779
if (packet->duration == 0 ) {
811
780
packet->duration = 1 ;
812
781
}
782
+ // av_packet_rescale_ts ensures encoded frames have correct timestamps.
783
+ // This prevents "no more frames" errors when decoding encoded frames,
784
+ // https://github.com/pytorch/audio/blob/b6a3368a45aaafe05f1a6a9f10c68adc5e944d9e/src/libtorio/ffmpeg/stream_writer/encoder.cpp#L46
813
785
av_packet_rescale_ts (
814
786
packet.get (), avCodecContext_->time_base , avStream_->time_base );
815
787
packet->stream_index = streamIndex_;
0 commit comments