@@ -100,6 +100,7 @@ AudioEncoder::AudioEncoder(
100100 // raise. We need to handle this, probably converting the format with
101101 // libswresample.
102102 avCodecContext_->sample_fmt = AV_SAMPLE_FMT_FLTP;
103+ // avCodecContext_->sample_fmt = AV_SAMPLE_FMT_S16;
103104
104105 int numChannels = static_cast <int >(wf_.sizes ()[0 ]);
105106 TORCH_CHECK (
@@ -120,12 +121,6 @@ AudioEncoder::AudioEncoder(
120121 " avcodec_open2 failed: " ,
121122 getFFMPEGErrorStringFromErrorCode (status));
122123
123- TORCH_CHECK (
124- avCodecContext_->frame_size > 0 ,
125- " frame_size is " ,
126- avCodecContext_->frame_size ,
127- " . Cannot encode. This should probably never happen?" );
128-
129124 // We're allocating the stream here. Streams are meant to be freed by
130125 // avformat_free_context(avFormatContext), which we call in the
131126 // avFormatContext_'s destructor.
@@ -143,7 +138,10 @@ AudioEncoder::AudioEncoder(
143138void AudioEncoder::encode () {
144139 UniqueAVFrame avFrame (av_frame_alloc ());
145140 TORCH_CHECK (avFrame != nullptr , " Couldn't allocate AVFrame." );
146- avFrame->nb_samples = avCodecContext_->frame_size ;
141+ // Default to 256 like in torchaudio
142+ int numSamplesAllocatedPerFrame =
143+ avCodecContext_->frame_size > 0 ? avCodecContext_->frame_size : 256 ;
144+ avFrame->nb_samples = numSamplesAllocatedPerFrame;
147145 avFrame->format = avCodecContext_->sample_fmt ;
148146 avFrame->sample_rate = avCodecContext_->sample_rate ;
149147 avFrame->pts = 0 ;
@@ -160,7 +158,6 @@ void AudioEncoder::encode() {
160158 uint8_t * pwf = static_cast <uint8_t *>(wf_.data_ptr ());
161159 int numSamples = static_cast <int >(wf_.sizes ()[1 ]); // per channel
162160 int numEncodedSamples = 0 ; // per channel
163- int numSamplesPerFrame = avCodecContext_->frame_size ; // per channel
164161 int numBytesPerSample = static_cast <int >(wf_.element_size ());
165162 int numBytesPerChannel = numSamples * numBytesPerSample;
166163
@@ -178,7 +175,7 @@ void AudioEncoder::encode() {
178175 getFFMPEGErrorStringFromErrorCode (status));
179176
180177 int numSamplesToEncode =
181- std::min (numSamplesPerFrame , numSamples - numEncodedSamples);
178+ std::min (numSamplesAllocatedPerFrame , numSamples - numEncodedSamples);
182179 int numBytesToEncode = numSamplesToEncode * numBytesPerSample;
183180
184181 for (int ch = 0 ; ch < wf_.sizes ()[0 ]; ch++) {
0 commit comments