Skip to content

Commit c3e5f08

Browse files
committed
ContainerM4A corrections
1 parent 8702e05 commit c3e5f08

File tree

4 files changed

+95
-50
lines changed

4 files changed

+95
-50
lines changed

src/AudioTools/AudioCodecs/ContainerM4A.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,12 @@ namespace audio_tools {
1616
*/
1717
class ContainerM4A : public ContainerDecoder {
1818
public:
19-
ContainerM4A() = default;
19+
ContainerM4A() {
20+
demux.setReference(this);
21+
demux.setCallback(decodeAudio);
22+
};
2023

21-
ContainerM4A(MultiDecoder& decoder) {
24+
ContainerM4A(MultiDecoder& decoder) : ContainerM4A() {
2225
p_decoder = &decoder;
2326
p_decoder->addNotifyAudioChange(*this);
2427
}
@@ -32,7 +35,6 @@ class ContainerM4A : public ContainerDecoder {
3235

3336
bool begin() override {
3437
demux.begin();
35-
demux.setReference(this);
3638
if (p_decoder) p_decoder->begin();
3739
is_active = true;
3840
return true;
@@ -46,6 +48,7 @@ class ContainerM4A : public ContainerDecoder {
4648
}
4749

4850
size_t write(const uint8_t* data, size_t len) override {
51+
if (is_active == false) return len;
4952
demux.write(data, len);
5053
return len;
5154
}
@@ -56,20 +59,25 @@ class ContainerM4A : public ContainerDecoder {
5659
bool is_active = false;
5760
bool is_magic_cookie_processed = false;
5861
MultiDecoder* p_decoder = nullptr;
59-
M4AAudioDemuxer demux{decodeAudio};
62+
M4AAudioDemuxer demux;
6063

6164
static void decodeAudio(const M4AAudioDemuxer::Frame& frame, void* ref) {
6265
ContainerM4A* self = static_cast<ContainerM4A*>(ref);
63-
MultiDecoder& dec = *self->p_decoder;
66+
if (self->p_decoder == nullptr) {
67+
LOGE("No decoder defined, cannot decode audio frame: %s (%u bytes)", frame.mime, (unsigned) frame.size);
68+
return;
69+
}
70+
MultiDecoder& dec = *(self->p_decoder);
6471
// select decoder based on mime type
65-
if (!dec.selectDecoder(frame.mime)){
66-
const char*mime = frame.mime ? frame.mime : "(nullptr)";
72+
if (!dec.selectDecoder(frame.mime)) {
73+
const char* mime = frame.mime ? frame.mime : "(nullptr)";
6774
LOGE("No decoder found for mime type: %s", mime);
6875
return;
6976
}
7077

71-
// process magic cookie if not done yet
72-
if (!self->is_magic_cookie_processed) {
78+
// for AAC only: process magic cookie if not done yet
79+
if (StrView(frame.mime) == "audio/aac" &&
80+
!self->is_magic_cookie_processed) {
7381
auto& magic_cookie = self->demux.getAlacMagicCookie();
7482
if (magic_cookie.size() > 0) {
7583
dec.setCodecConfig(magic_cookie.data(), magic_cookie.size());

src/AudioTools/AudioCodecs/M4AAudioDemuxer.h

Lines changed: 76 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class M4AAudioDemuxer {
1818
*/
1919
struct Frame {
2020
Codec codec;
21-
const char* mime = nullptr;;
21+
const char* mime = nullptr;
2222
const uint8_t* data;
2323
size_t size;
2424
uint64_t timestamp;
@@ -45,16 +45,18 @@ class M4AAudioDemuxer {
4545

4646
void setCodec(M4AAudioDemuxer::Codec c) { codec = c; }
4747

48-
void setCallback(FrameCallback cb) {
49-
callback = cb;
50-
}
48+
void setCallback(FrameCallback cb) { callback = cb; }
5149

5250
void setReference(void* r) { ref = r; }
5351

5452
void write(const uint8_t* data, size_t len, bool is_final) {
5553
// Resize buffer to the current sample size
5654
size_t currentSize = currentSampleSize();
5755
resize(currentSize);
56+
if (currentSize == 0) {
57+
LOGE("No sample size defined, cannot write data");
58+
return;
59+
}
5860

5961
/// fill buffer up to the current sample size
6062
for (int j = 0; j < len; j++) {
@@ -97,16 +99,15 @@ class M4AAudioDemuxer {
9799
frame.timestamp = 0; // TODO: timestamp
98100
switch (codec) {
99101
case Codec::AAC: {
100-
uint8_t adts[7];
101-
writeAdtsHeader(adts, aacProfile, sampleRateIdx, channelCfg,
102-
frameSize);
103102
uint8_t out[frameSize + 7];
104-
memcpy(out, adts, 7);
103+
writeAdtsHeader(out, aacProfile, sampleRateIdx, channelCfg,
104+
frameSize);
105105
memcpy(out + 7, buffer.data(), frameSize);
106106
frame.data = out;
107107
frame.size = sizeof(out);
108108
frame.mime = "audio/aac";
109-
callback(frame, ref);
109+
if (callback) callback(frame, ref);
110+
else LOGE("No callback defined for audio frame extraction");
110111
return;
111112
}
112113
case Codec::ALAC:
@@ -120,6 +121,7 @@ class M4AAudioDemuxer {
120121
break;
121122
}
122123
if (callback) callback(frame, ref);
124+
else LOGE("No callback defined for audio frame extraction");
123125
}
124126

125127
void resize(size_t newSize) {
@@ -151,24 +153,24 @@ class M4AAudioDemuxer {
151153

152154
using FrameCallback = std::function<void(const Frame&, void* ref)>;
153155

154-
M4AAudioDemuxer(FrameCallback cb) : callback(cb) {
156+
M4AAudioDemuxer() {
155157
parser.setReference(this);
156158
parser.setCallback(boxCallback);
157159

158-
sampleExtractor.setReference(this);
159-
sampleExtractor.setCallback(callback);
160-
161160
// incremental data callback
162161
parser.setDataCallback(boxDataCallback);
163162

164-
// Add more as needed...
165-
// parser.begin();
163+
}
164+
165+
void setCallback(FrameCallback cb) {
166+
sampleExtractor.setReference(ref);
167+
sampleExtractor.setCallback(cb);
166168
}
167169

168170
void begin() {
169171
codec = Codec::Unknown;
170172
alacMagicCookie.clear();
171-
resize(1024);
173+
resize(default_size);
172174

173175
// When codec/sampleSizes/callback/ref change, update the extractor:
174176
parser.begin();
@@ -181,24 +183,23 @@ class M4AAudioDemuxer {
181183

182184
Vector<uint8_t>& getAlacMagicCookie() { return alacMagicCookie; }
183185

184-
void setReference(void* ref) {
185-
this->ref = ref;
186-
}
186+
void setReference(void* ref) { this->ref = ref; }
187187

188188
void resize(int size) {
189+
default_size = size;
189190
if (buffer.size() < size) {
190191
buffer.resize(size);
191192
}
192193
}
193194

194195
protected:
195-
FrameCallback callback;
196196
MP4ParserIncremental parser;
197197
Codec codec = Codec::Unknown;
198198
Vector<uint8_t> alacMagicCookie;
199199
SingleBuffer<uint8_t> buffer; // buffer to collect incremental data
200200
SampleExtractor sampleExtractor;
201201
void* ref = nullptr;
202+
size_t default_size = 1024;
202203

203204
bool isRelevantBox(const char* type) {
204205
// Check if the box is relevant for audio demuxing
@@ -208,10 +209,17 @@ class M4AAudioDemuxer {
208209
/// Just prints the box name and the number of bytes received
209210
static void boxCallback(MP4Parser::Box& box, void* ref) {
210211
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
211-
if (self.isRelevantBox(box.type)) {
212-
self.resize(box.size);
213-
self.buffer.clear();
214-
if (box.data_size > 0) self.buffer.writeArray(box.data, box.data_size);
212+
bool is_relevant = self.isRelevantBox(box.type);
213+
if (is_relevant) {
214+
LOGI("Box: %s, size: %u bytes", box.type, (unsigned) box.size);
215+
if (box.data_size == 0) {
216+
// setup for increemental processing
217+
self.resize(box.size);
218+
self.buffer.clear();
219+
} else {
220+
// we have the complete box data
221+
self.processBox(box);
222+
}
215223
}
216224
}
217225

@@ -221,31 +229,44 @@ class M4AAudioDemuxer {
221229

222230
// mdat must not be buffered
223231
if (StrView(box.type) == "mdat") {
224-
self.sampleExtractor.setCodec(self.codec);
232+
LOGI("*Box: %s, size: %u bytes", box.type, (unsigned) len);
233+
//self.sampleExtractor.setCodec(self.codec);
225234
self.sampleExtractor.write(data, len, is_final);
226235
return;
227236
}
228237

229238
// only process relevant boxes
230-
if (!self.isRelevantBox(box.type) == false) return;
239+
if (!self.isRelevantBox(box.type)) return;
240+
241+
LOGI("*Box: %s, size: %u bytes", box.type, (unsigned) len);
231242

232243
// others fill buffer incrementally
233244
if (len > 0) {
234-
self.buffer.writeArray(data, len);
245+
size_t written = self.buffer.writeArray(data, len);
246+
if (written != len) {
247+
LOGE("Failed to write all data to buffer, written: %zu, expected: %zu",
248+
written, len);
249+
}
235250
}
236251

237252
// on last chunk, call the specific box handler
238253
if (is_final) {
239254
MP4Parser::Box complete_box = box;
240255
complete_box.data = self.buffer.data();
241256
complete_box.data_size = self.buffer.size();
242-
if (StrView(box.type) == "stsd") {
243-
self.onStsd(complete_box);
244-
} else if (StrView(box.type) == "stsz") {
245-
self.onStsz(complete_box);
246-
} else if (StrView(box.type) == "stco") {
247-
self.onStco(complete_box);
248-
}
257+
self.processBox(complete_box);
258+
// The buffer might be quite large, so we resize it to the default size
259+
self.resize(self.default_size);
260+
}
261+
}
262+
263+
void processBox(MP4Parser::Box& box) {
264+
if (StrView(box.type) == "stsd") {
265+
onStsd(box);
266+
} else if (StrView(box.type) == "stsz") {
267+
onStsz(box);
268+
} else if (StrView(box.type) == "stco") {
269+
onStco(box);
249270
}
250271
}
251272

@@ -254,7 +275,8 @@ class M4AAudioDemuxer {
254275
}
255276

256277
void onStsd(const MP4Parser::Box& box) {
257-
const uint8_t* data = box.data;
278+
LOGI("onStsd: %s, size: %zu bytes", box.type, box.data_size);
279+
const uint8_t* data = box.data; // skip version/flags ?
258280
size_t size = box.data_size;
259281
if (size < 8) return;
260282
uint32_t entryCount = readU32(data + 4);
@@ -268,13 +290,22 @@ class M4AAudioDemuxer {
268290
size_t childrenEnd = cursor + entrySize;
269291
codec = Codec::Unknown;
270292
if (StrView(entryType) == "mp4a") {
293+
LOGI("-> AAC")
271294
codec = Codec::AAC;
295+
sampleExtractor.setCodec(codec);
272296
onStsdHandleMp4a(data, size, childrenStart, childrenEnd);
297+
break;
273298
} else if (StrView(entryType) == ".mp3") {
299+
LOGI("-> MP3")
274300
codec = Codec::MP3;
301+
sampleExtractor.setCodec(codec);
302+
break;
275303
} else if (StrView(entryType) == "alac") {
304+
LOGI("-> ALAC")
276305
codec = Codec::ALAC;
306+
sampleExtractor.setCodec(codec);
277307
onStsdHandleAlac(data, size, childrenStart, childrenEnd);
308+
break;
278309
}
279310
cursor += entrySize;
280311
}
@@ -336,8 +367,9 @@ class M4AAudioDemuxer {
336367
}
337368

338369
void onStsz(MP4Parser::Box& box) {
370+
LOGI("onStsz: %s, size: %zu bytes", box.type, box.data_size);
339371
// Parse stsz box and fill sampleSizes
340-
const uint8_t* data = box.data;
372+
const uint8_t* data = box.data + 4; // skip version/flags
341373
size_t size = box.data_size;
342374
if (size < 12) return;
343375
uint32_t sampleSize = readU32(data);
@@ -346,25 +378,29 @@ class M4AAudioDemuxer {
346378
Vector<size_t>& sampleSizes = sampleExtractor.getSampleSizes();
347379
if (sampleSize == 0) {
348380
if (size < 12 + 4 * sampleCount) return;
381+
LOGI("-> Sample Sizes Count: %u", sampleCount);
382+
sampleSizes.resize(sampleCount);
349383
for (uint32_t i = 0; i < sampleCount; ++i) {
350-
sampleSizes.push_back(readU32(data + 12 + i * 4));
384+
sampleSizes[i] = readU32(data + 12 + i * 4);
351385
}
352386
} else {
353387
sampleSizes.assign(sampleCount, sampleSize);
354388
}
355389
}
356390

357391
void onStco(MP4Parser::Box& box) {
392+
LOGI("onStco: %s, size: %zu bytes", box.type, box.data_size);
358393
// Parse stco box and fill chunkOffsets
359-
const uint8_t* data = box.data;
394+
const uint8_t* data = box.data + 4; // skip version/flags
360395
size_t size = box.data_size;
361396
if (size < 4) return;
362397
uint32_t entryCount = readU32(data);
363398
Vector<size_t>& chunkOffsets = sampleExtractor.getChunkOffsets();
364-
chunkOffsets.clear();
365399
if (size < 4 + 4 * entryCount) return;
400+
chunkOffsets.resize(entryCount);
401+
LOGI("-> Chunk offsets count: %u", entryCount);
366402
for (uint32_t i = 0; i < entryCount; ++i) {
367-
chunkOffsets.push_back(readU32(data + 4 + i * 4));
403+
chunkOffsets[i] = readU32(data + 4 + i * 4);
368404
}
369405
}
370406
};

src/AudioTools/CoreAudio/AudioLoggerSTD.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class AudioLogger {
6363
void println() {
6464
#if defined(IS_DESKTOP) || defined(IS_DESKTOP_WITH_TIME_ONLY)
6565
fprintf(stderr, "%s\n", print_buffer);
66+
fflush(stderr);
6667
#else
6768
log_print_ptr->println(print_buffer);
6869
log_print_ptr->flush();

tests-cmake/codec/container-m4a/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ target_compile_options(m4a PRIVATE -Wno-multichar)
4242

4343
# set compile optioins
4444
target_compile_options(arduino-audio-tools INTERFACE -Wno-inconsistent-missing-override)
45-
target_compile_definitions(arduino-audio-tools INTERFACE -DUSE_ALLOCATOR)
45+
#target_compile_definitions(arduino-audio-tools INTERFACE -DUSE_ALLOCATOR)
4646

4747
# specify libraries
4848
target_link_libraries(m4a PRIVATE

0 commit comments

Comments
 (0)