@@ -405,6 +405,58 @@ VideoDecoder::VideoStreamOptions::VideoStreamOptions(
405405 }
406406}
407407
408+ void print_codecContext (AVCodecContext* cc) {
409+ printf (" Codec ID: %d\n " , cc->codec_id );
410+ printf (" Codec Type: %d\n " , cc->codec_type );
411+ printf (" Codec Name: %s\n " , cc->codec ? cc->codec ->name : " unknown" );
412+ printf (" Bit Rate: %ld\n " , cc->bit_rate );
413+ printf (" Time Base: %d/%d\n " , cc->time_base .num , cc->time_base .den );
414+ printf (" GOP Size: %d\n " , cc->gop_size );
415+ printf (" Max B-Frames: %d\n " , cc->max_b_frames );
416+ if (cc->codec_type == AVMEDIA_TYPE_VIDEO) {
417+ printf (" Width: %d\n " , cc->width );
418+ printf (" Height: %d\n " , cc->height );
419+ printf (" Pixel Format: %s\n " , av_get_pix_fmt_name (cc->pix_fmt ));
420+ printf (" Frame Rate: %d/%d\n " , cc->framerate .num , cc->framerate .den );
421+ } else if (cc->codec_type == AVMEDIA_TYPE_AUDIO) {
422+ printf (" Sample Rate: %d\n " , cc->sample_rate );
423+ printf (" Channels: %d\n " , cc->channels );
424+ printf (" Channel Layout: %ld\n " , cc->channel_layout );
425+ printf (" Sample Format: %s\n " , av_get_sample_fmt_name (cc->sample_fmt ));
426+ }
427+ printf (" Profile: %d\n " , cc->profile );
428+ printf (" Level: %d\n " , cc->level );
429+ printf (" Flags: %d\n " , cc->flags );
430+ printf (" Thread Count: %d\n " , cc->thread_count );
431+ // Additional attributes
432+ printf (" Skip Frame: %d\n " , cc->skip_frame );
433+ printf (" Skip IDCT: %d\n " , cc->skip_idct );
434+ printf (" Skip Loop Filter: %d\n " , cc->skip_loop_filter );
435+ printf (" Error Recognition: %d\n " , cc->err_recognition );
436+ printf (" Error Concealment: %d\n " , cc->error_concealment );
437+ printf (" HW Device Context: %p\n " , cc->hw_device_ctx );
438+ printf (" HW Accel: %p\n " , cc->hwaccel );
439+ printf (" Pkt Timebase: %d/%d\n " , cc->pkt_timebase .num , cc->pkt_timebase .den );
440+ printf (" Delay: %d\n " , cc->delay );
441+ printf (" Extradata Size: %d\n " , cc->extradata_size );
442+ if (cc->extradata && cc->extradata_size > 0 ) {
443+ printf (" Extradata: " );
444+ for (int i = 0 ; i < cc->extradata_size ; i++) {
445+ printf (" %02X " , cc->extradata [i]);
446+ }
447+ printf (" \n " );
448+ }
449+ printf (" RC Buffer Size: %d\n " , cc->rc_buffer_size );
450+ printf (" RC Max Rate: %d\n " , cc->rc_max_rate );
451+ printf (" RC Min Rate: %d\n " , cc->rc_min_rate );
452+ printf (" Thread Type: %d\n " , cc->thread_type );
453+ printf (" Ticks Per Frame: %d\n " , cc->ticks_per_frame );
454+ printf (
455+ " Subtitle Char Encoding: %s\n " ,
456+ cc->sub_charenc ? cc->sub_charenc : " N/A" );
457+ printf (" \n " );
458+ }
459+
408460void VideoDecoder::addVideoStreamDecoder (
409461 int preferredStreamIndex,
410462 const VideoStreamOptions& videoStreamOptions) {
@@ -456,6 +508,10 @@ void VideoDecoder::addVideoStreamDecoder(
456508 AVCodecContext* codecContext = avcodec_alloc_context3 (avCodec);
457509 TORCH_CHECK (codecContext != nullptr );
458510 codecContext->thread_count = videoStreamOptions.ffmpegThreadCount .value_or (0 );
511+ if (!codecContext->channel_layout ) {
512+ codecContext->channel_layout =
513+ av_get_default_channel_layout (codecContext->channels );
514+ }
459515 streamInfo.codecContext .reset (codecContext);
460516
461517 int retVal = avcodec_parameters_to_context (
@@ -499,6 +555,8 @@ void VideoDecoder::addVideoStreamDecoder(
499555
500556 streamInfo.colorConversionLibrary =
501557 videoStreamOptions.colorConversionLibrary .value_or (defaultLibrary);
558+
559+ print_codecContext (streamInfo.codecContext .get ());
502560}
503561
504562void VideoDecoder::updateMetadataWithCodecContext (
@@ -926,6 +984,64 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() {
926984// LOW-LEVEL DECODING
927985// --------------------------------------------------------------------------
928986
987+ void print_packet (AVPacket* packet) {
988+ printf (
989+ " Packet PTS: %ld, DTS: %ld, Duration: %d, Size: %d, Stream Index: %d\n " ,
990+ packet->pts ,
991+ packet->dts ,
992+ packet->duration ,
993+ packet->size ,
994+ packet->stream_index );
995+ // Optional: Calculate a simple checksum or hash of the packet data
996+ unsigned long checksum = 0 ;
997+ for (int i = 0 ; i < packet->size ; i++) {
998+ checksum += packet->data [i];
999+ }
1000+ printf (" Packet Checksum: %lu\n\n " , checksum);
1001+ fflush (stdout);
1002+ }
1003+
1004+ void print_avFrame (AVFrame* avFrame) {
1005+ printf (" Format: %d\n " , avFrame->format );
1006+ printf (" Width: %d\n " , avFrame->width );
1007+ printf (" Height: %d\n " , avFrame->height );
1008+ printf (
1009+ " Channels: %d\n " ,
1010+ av_get_channel_layout_nb_channels (avFrame->channel_layout ));
1011+ printf (" Channel Layout: %ld\n " , avFrame->channel_layout );
1012+ printf (" Number of Samples: %d\n " , avFrame->nb_samples );
1013+ printf (" PTS: %ld\n " , avFrame->pts );
1014+ printf (" Packet DTS: %ld\n " , avFrame->pkt_dts );
1015+ printf (" Packet Duration: %d\n " , avFrame->pkt_duration );
1016+ printf (" Packet Pos: %d\n " , avFrame->pkt_pos );
1017+ for (int i = 0 ; i < AV_NUM_DATA_POINTERS; i++) {
1018+ if (avFrame->data [i]) {
1019+ printf (" Data[%d] Line Size: %d\n " , i, avFrame->linesize [i]);
1020+ }
1021+ }
1022+ printf (" Color Range: %d\n " , avFrame->color_range );
1023+ printf (" Color Primaries: %d\n " , avFrame->color_primaries );
1024+ printf (" Color Transfer Characteristic: %d\n " , avFrame->color_trc );
1025+ printf (" Color Space: %d\n " , avFrame->colorspace );
1026+ printf (" Chroma Location: %d\n " , avFrame->chroma_location );
1027+ printf (
1028+ " Sample Aspect Ratio: %d/%d\n " ,
1029+ avFrame->sample_aspect_ratio .num ,
1030+ avFrame->sample_aspect_ratio .den );
1031+ printf (" Key Frame: %d\n " , avFrame->key_frame );
1032+ printf (" Picture Type: %d\n " , avFrame->pict_type );
1033+ printf (" Coded Picture Number: %d\n " , avFrame->coded_picture_number );
1034+ printf (" Display Picture Number: %d\n " , avFrame->display_picture_number );
1035+
1036+ unsigned long checksum = 0 ;
1037+ for (int i = 0 ; i < 100 ; i++) {
1038+ checksum += avFrame->extended_data [0 ][i];
1039+ }
1040+ printf (" Frame Checksum: %lu\n " , checksum);
1041+ printf (" \n " );
1042+ fflush (stdout);
1043+ }
1044+
9291045VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame (
9301046 std::function<bool (AVFrame*)> filterFunction) {
9311047 if (activeStreamIndex_ == NO_ACTIVE_STREAM) {
@@ -942,14 +1058,14 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
9421058
9431059 StreamInfo& streamInfo = streamInfos_[activeStreamIndex_];
9441060
945- // Need to get the next frame or error from PopFrame.
9461061 UniqueAVFrame avFrame (av_frame_alloc ());
9471062 AutoAVPacket autoAVPacket;
9481063 int ffmpegStatus = AVSUCCESS;
9491064 bool reachedEOF = false ;
9501065 while (true ) {
9511066 ffmpegStatus =
9521067 avcodec_receive_frame (streamInfo.codecContext .get (), avFrame.get ());
1068+ // printf("output of avcodec_receive_frame: %d\n", ffmpegStatus);
9531069
9541070 if (ffmpegStatus != AVSUCCESS && ffmpegStatus != AVERROR (EAGAIN)) {
9551071 // Non-retriable error
@@ -960,7 +1076,7 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
9601076 // Is this the kind of frame we're looking for?
9611077 if (ffmpegStatus == AVSUCCESS && filterFunction (avFrame.get ())) {
9621078 // Yes, this is the frame we'll return; break out of the decoding loop.
963- printf (" %ld %ld\n " , avFrame->pts , avFrame->duration );
1079+ // printf("%ld %ld\n", avFrame->pts, avFrame->duration);
9641080
9651081 break ;
9661082 } else if (ffmpegStatus == AVSUCCESS) {
@@ -1015,8 +1131,10 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
10151131
10161132 // We got a valid packet. Send it to the decoder, and we'll receive it in
10171133 // the next iteration.
1134+ print_packet (packet.get ());
10181135 ffmpegStatus =
10191136 avcodec_send_packet (streamInfo.codecContext .get (), packet.get ());
1137+ print_packet (packet.get ());
10201138 if (ffmpegStatus < AVSUCCESS) {
10211139 throw std::runtime_error (
10221140 " Could not push packet to decoder: " +
@@ -1045,6 +1163,7 @@ VideoDecoder::AVFrameStream VideoDecoder::decodeAVFrame(
10451163 // the file and that will flush the decoder.
10461164 streamInfo.currentPts = avFrame->pts ;
10471165 streamInfo.currentDuration = getDuration (avFrame);
1166+ print_avFrame (avFrame.get ());
10481167
10491168 return AVFrameStream (std::move (avFrame), activeStreamIndex_);
10501169}
@@ -1072,27 +1191,22 @@ VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput(
10721191 auto sampleRate = avFrame->sample_rate ;
10731192 auto numChannels = avFrame->ch_layout .nb_channels ;
10741193
1075- // printf("numSamples: %d\n", numSamples);
1076- // printf("sample rate: %d\n", sampleRate);
1194+ // printf("numSamples: %d\n", numSamples);
1195+ // printf("sample rate: %d\n", sampleRate);
10771196
1078- // printf("numChannels: %d\n", numChannels);
1197+ // printf("numChannels: %d\n", numChannels);
10791198 int bytesPerSample =
10801199 av_get_bytes_per_sample (streamInfo.codecContext ->sample_fmt );
1081- // printf("bytes per sample: %d\n", bytesPerSample);
1082-
1083- // Assuming format is FLTP (float 32bits ???)
1200+ // printf("bytes per sample: %d\n", bytesPerSample);
10841201
1085- // This is slow, use accessor. or just memcpy?
10861202 torch::Tensor data = torch::empty ({numChannels, numSamples}, torch::kFloat32 );
10871203 for (auto channel = 0 ; channel < numChannels; ++channel) {
1088- // auto channelDataPtr = data[channel].data_ptr<uint8_t>();
1089- // std::memcpy(channelDataPtr, avFrame->data[channel], numSamples *
1090- // bytesPerSample);
10911204 float * dataFloatPtr = (float *)(avFrame->data [channel]);
10921205 for (auto sampleIndex = 0 ; sampleIndex < numSamples; ++sampleIndex) {
10931206 data[channel][sampleIndex] = dataFloatPtr[sampleIndex];
10941207 }
10951208 }
1209+
10961210 frameOutput.data = data;
10971211 return frameOutput;
10981212
0 commit comments