Skip to content

Commit 622f28d

Browse files
committed
ContainerM4A: more callbacks
1 parent 3752ec5 commit 622f28d

File tree

4 files changed

+145
-95
lines changed

4 files changed

+145
-95
lines changed

src/AudioTools/AudioCodecs/ContainerM4A.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class ContainerM4A : public ContainerDecoder {
7878
// for AAC only: process magic cookie if not done yet
7979
if (StrView(frame.mime) == "audio/aac" &&
8080
!self->is_magic_cookie_processed) {
81-
auto& magic_cookie = self->demux.getAlacMagicCookie();
81+
auto& magic_cookie = self->demux.getALACMagicCookie();
8282
if (magic_cookie.size() > 0) {
8383
dec.setCodecConfig(magic_cookie.data(), magic_cookie.size());
8484
}

src/AudioTools/AudioCodecs/M4AAudioDemuxer.h

Lines changed: 63 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ class M4AAudioDemuxer {
7575
box_pos += currentSize;
7676
++sampleIndex;
7777
currentSize = currentSampleSize();
78-
if (box_pos >= box_size){
79-
LOGI("Reached end of box: %s write", is_final ? "final" : "not final");
78+
if (box_pos >= box_size) {
79+
LOGI("Reached end of box: %s write",
80+
is_final ? "final" : "not final");
8081
return j;
8182
}
8283
if (currentSize == 0) {
@@ -92,7 +93,7 @@ class M4AAudioDemuxer {
9293
Vector<size_t>& getSampleSizes() { return sampleSizes; }
9394
Vector<size_t>& getChunkOffsets() { return chunkOffsets; }
9495

95-
void setAacConfig(int profile, int srIdx, int chCfg) {
96+
void setAACConfig(int profile, int srIdx, int chCfg) {
9697
aacProfile = profile;
9798
sampleRateIdx = srIdx;
9899
channelCfg = chCfg;
@@ -181,6 +182,10 @@ class M4AAudioDemuxer {
181182
M4AAudioDemuxer() {
182183
parser.setReference(this);
183184
parser.setCallback(boxCallback);
185+
// parsing for content of stsd (Sample Description Box)
186+
parser.setCallback("esds", esdsCallback);
187+
parser.setCallback("mp4a", mp4aCallback);
188+
parser.setCallback("alac", alacCallback);
184189

185190
// incremental data callback
186191
parser.setDataCallback(boxDataCallback);
@@ -205,7 +210,7 @@ class M4AAudioDemuxer {
205210

206211
int availableForWrite() { return parser.availableForWrite(); }
207212

208-
Vector<uint8_t>& getAlacMagicCookie() { return alacMagicCookie; }
213+
Vector<uint8_t>& getALACMagicCookie() { return alacMagicCookie; }
209214

210215
void setReference(void* ref) { this->ref = ref; }
211216

@@ -231,6 +236,21 @@ class M4AAudioDemuxer {
231236
StrView(type) == "stco");
232237
}
233238

239+
static void mp4aCallback(MP4Parser::Box& box, void* ref) {
240+
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
241+
self.onMp4a(box);
242+
}
243+
244+
static void esdsCallback(MP4Parser::Box& box, void* ref) {
245+
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
246+
self.onEsds(box);
247+
}
248+
249+
static void alacCallback(MP4Parser::Box& box, void* ref) {
250+
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
251+
self.OnAlac(box);
252+
}
253+
234254
/// Just prints the box name and the number of bytes received
235255
static void boxCallback(MP4Parser::Box& box, void* ref) {
236256
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
@@ -314,90 +334,60 @@ class M4AAudioDemuxer {
314334
size_t size = box.data_size;
315335
if (size < 8) return;
316336
uint32_t entryCount = readU32(data + 4);
317-
size_t cursor = 8;
318-
for (uint32_t i = 0; i < entryCount; ++i) {
319-
if (cursor + 8 > size) break;
320-
uint32_t entrySize = readU32(data + cursor);
321-
const char* entryType = (const char*)(data + cursor + 4);
322-
if (entrySize < 36 || cursor + entrySize > size) break;
323-
size_t childrenStart = cursor + 8 + 28;
324-
size_t childrenEnd = cursor + entrySize;
325-
codec = Codec::Unknown;
326-
if (StrView(entryType) == "mp4a") {
327-
LOGI("-> AAC")
328-
codec = Codec::AAC;
329-
sampleExtractor.setCodec(codec);
330-
onStsdHandleMp4a(data, size, childrenStart, childrenEnd);
331-
break;
332-
} else if (StrView(entryType) == ".mp3") {
333-
LOGI("-> MP3")
334-
codec = Codec::MP3;
335-
sampleExtractor.setCodec(codec);
336-
break;
337-
} else if (StrView(entryType) == "alac") {
338-
LOGI("-> ALAC")
339-
codec = Codec::ALAC;
340-
sampleExtractor.setCodec(codec);
341-
onStsdHandleAlac(data, size, childrenStart, childrenEnd);
342-
break;
343-
}
344-
cursor += entrySize;
345-
}
337+
// One or more sample entry boxes (e.g. mp4a, .mp3, alac)
338+
parser.parseString(data + 8, size - 8);
346339
}
347340

348-
void onStsdHandleMp4a(const uint8_t* data, size_t size, size_t childrenStart,
349-
size_t childrenEnd) {
341+
void onMp4a(const MP4Parser::Box& box) {
342+
LOGI("onMp4a: %s, size: %zu bytes", box.type, box.data_size);
343+
if (box.data_size < 36) return; // Minimum size for mp4a box
344+
345+
// use default configuration
350346
int aacProfile = 2; // Default: AAC LC
351347
int sampleRateIdx = 4; // Default: 44100 Hz
352348
int channelCfg = 2; // Default: Stereo
349+
sampleExtractor.setAACConfig(aacProfile, sampleRateIdx, channelCfg);
350+
codec = Codec::AAC;
351+
sampleExtractor.setCodec(codec);
353352

354-
// Look for 'esds' box inside children
355-
size_t childOffset = childrenStart;
356-
while (childOffset + 8 <= childrenEnd && childOffset + 8 <= size) {
357-
uint32_t childSize = readU32(data + childOffset);
358-
const char* childType = (const char*)(data + childOffset + 4);
359-
if (childSize < 8 || childOffset + childSize > size) break;
360-
if (StrView(childType) == "esds") {
361-
onStsdParseEsdsForAacConfig(data + childOffset + 8, childSize - 8,
362-
aacProfile, sampleRateIdx, channelCfg);
363-
break;
364-
}
365-
childOffset += childSize;
366-
}
367-
sampleExtractor.setAacConfig(aacProfile, sampleRateIdx, channelCfg);
353+
/// for mp4a we expect to contain a esds: child boxes start at 36
354+
int pos = 36 - 8;
355+
parser.parseString(box.data + pos, box.data_size - pos);
368356
}
369357

370-
void onStsdParseEsdsForAacConfig(const uint8_t* esds, size_t esdsLen,
371-
int& aacProfile, int& sampleRateIdx,
372-
int& channelCfg) {
373-
for (size_t j = 0; j + 2 < esdsLen; ++j) {
374-
if (esds[j] == 0x05) { // 0x05 = AudioSpecificConfig tag
375-
// Next byte is length, then AudioSpecificConfig
376-
const uint8_t* asc = esds + j + 2;
377-
aacProfile = ((asc[0] >> 3) & 0x1F); // 5 bits
358+
void onEsds(const MP4Parser::Box& box) {
359+
LOGI("onEsds: %s, size: %zu bytes", box.type, box.data_size);
360+
int aacProfile = 2; // Default: AAC LC
361+
int sampleRateIdx = 4; // Default: 44100 Hz
362+
int channelCfg = 2; // Default: Stereo
363+
364+
for (size_t i = 2; i + 4 < box.data_size; ++i) {
365+
if (box.data[i] == 0x05) { // 0x05 = AudioSpecificConfig tag
366+
uint8_t asc_len = box.data[i + 1];
367+
if (i + 2 + asc_len > box.data_size) {
368+
LOGW("esds box not long enough for AudioSpecificConfig");
369+
//break;
370+
};
371+
const uint8_t* asc = box.data + i + 2;
372+
// AudioSpecificConfig is at least 2 bytes
373+
aacProfile = (asc[0] >> 3) & 0x1F; // 5 bits
378374
sampleRateIdx =
379375
((asc[0] & 0x07) << 1) | ((asc[1] >> 7) & 0x01); // 4 bits
380376
channelCfg = (asc[1] >> 3) & 0x0F; // 4 bits
381-
break;
377+
LOGI("AudioSpecificConfig: profile=%d, sampleRateIdx=%d, channelCfg=%d",
378+
aacProfile, sampleRateIdx, channelCfg);
379+
sampleExtractor.setAACConfig(aacProfile, sampleRateIdx, channelCfg);
382380
}
383381
}
384382
}
385383

386-
void onStsdHandleAlac(const uint8_t* data, size_t size, size_t childrenStart,
387-
size_t childrenEnd) {
388-
size_t childOffset = childrenStart;
389-
while (childOffset + 8 <= childrenEnd && childOffset + 8 <= size) {
390-
uint32_t childSize = readU32(data + childOffset);
391-
const char* childType = (const char*)(data + childOffset + 4);
392-
if (childSize < 8 || childOffset + childSize > size) break;
393-
if (StrView(childType) == "alac") {
394-
alacMagicCookie.resize(childSize - 8);
395-
std::memcpy(alacMagicCookie.data(), data + childOffset + 8,
396-
childSize - 8);
397-
break;
398-
}
399-
childOffset += childSize;
400-
}
384+
void OnAlac(const MP4Parser::Box& box) {
385+
LOGI("onAlac: %s, size: %zu bytes", box.type, box.data_size);
386+
codec = Codec::ALAC;
387+
sampleExtractor.setCodec(codec);
388+
389+
alacMagicCookie.resize(box.data_size);
390+
std::memcpy(alacMagicCookie.data(), box.data, box.data_size);
401391
}
402392

403393
void onStsz(MP4Parser::Box& box) {

src/AudioTools/AudioCodecs/MP4Parser.h

Lines changed: 74 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ class MP4Parser {
3434
friend class MP4ParserExt; // Allow MP4Parser to access private members
3535
size_t id = 0;
3636
char type[5]; // 4-character box type
37-
const uint8_t* data; // Pointer to box payload (not including header)
38-
size_t data_size;
39-
size_t size; // Size of payload (not including header)
40-
int level; // Nesting depth
41-
uint64_t offset; // File offset where box starts
37+
const uint8_t* data = nullptr; // Pointer to box payload (not including header)
38+
size_t data_size = 0;
39+
size_t size = 0; // Size of payload (not including header)
40+
int level = 0; // Nesting depth
41+
uint64_t offset = 0; // File offset where box starts
4242
bool is_complete = false;
4343
bool is_container = false;
4444

@@ -107,6 +107,34 @@ class MP4Parser {
107107

108108
int availableForWrite() { return buffer.availableForWrite(); }
109109

110+
/// Adds a box name that will be interpreted as container
111+
void addContainer(const char* name, int start = 0) {
112+
ContainerInfo info;
113+
info.name = name;
114+
info.start = start; // offset of child boxes
115+
}
116+
117+
/// trigger separate parsing (and callbacks) on the indicated string
118+
int parseString(const uint8_t* str, int len) {
119+
char type[5];
120+
int idx = 0;
121+
Box box;
122+
while (true) {
123+
if (!isValidType((const char*)str + idx + 4)) {
124+
return idx;
125+
}
126+
box.data = str + 8 + idx;
127+
box.size = readU32(str + idx);
128+
box.data_size = box.size - 8;
129+
strncpy(box.type, (char*)(str + idx + 4), 4);
130+
box.type[4] = '\0';
131+
idx += box.size;
132+
processCallback(box);
133+
if (idx >= len) break; // No more data to parse
134+
}
135+
return idx;
136+
}
137+
110138
protected:
111139
BoxCallback callback = defaultCallback;
112140
Vector<CallbackEntry> callbacks;
@@ -117,6 +145,11 @@ class MP4Parser {
117145
void* ref = this;
118146
Box box;
119147
bool is_error = false;
148+
struct ContainerInfo {
149+
const char* name = nullptr;
150+
int start = 0;
151+
};
152+
Vector<ContainerInfo> containers;
120153

121154
/// Returns the current file offset (absolute position in file)
122155
uint64_t currentFileOffset() { return fileOffset + parseOffset; }
@@ -199,7 +232,7 @@ class MP4Parser {
199232
}
200233

201234
// Regular logic for box with complete data
202-
processCallback();
235+
processCallback(box);
203236

204237
// Recurse into container
205238
if (box.is_container) {
@@ -228,7 +261,7 @@ class MP4Parser {
228261
}
229262
}
230263

231-
void processCallback() {
264+
void processCallback(Box& box) {
232265
bool is_called = false;
233266
for (const auto& entry : callbacks) {
234267
if (strncmp(entry.type, box.type, 4) == 0) {
@@ -240,35 +273,60 @@ class MP4Parser {
240273
if (!is_called) callback(box, ref);
241274
}
242275

243-
bool isContainerBox(const char* type) const {
244-
static const char* containers[] = {
245-
"moov", "trak", "mdia", "minf", "stbl", "edts", "dinf", "udta", "meta",
246-
"ilst", "moof", "traf", "mfra", "tref", "iprp", "sinf", "schi"};
247-
for (const char* c : containers)
248-
if (StrView(type) == c) return true;
276+
bool isContainerBox(const char* type) {
277+
// fill with default values if nothing has been defined
278+
if (containers.empty()) {
279+
static const char* containers_str[] = {
280+
"moov", "trak", "mdia", "minf", "stbl", "edts",
281+
"dinf", "udta", "meta", "ilst", "moof", "traf",
282+
"mfra", "tref", "iprp", "sinf", "schi"};
283+
for (const char* c : containers_str) {
284+
ContainerInfo info;
285+
info.name = c;
286+
info.start = 0;
287+
containers.push_back(info);
288+
}
289+
}
290+
// find the container by name
291+
for (auto& cont : containers) {
292+
if (StrView(type) == cont.name) return true;
293+
}
249294
return false;
250295
}
251296

297+
int getSubcontainerStart(const char* type) {
298+
for (auto& cont : containers) {
299+
if (StrView(type) == cont.name) return cont.start;
300+
}
301+
return 0;
302+
}
303+
252304
bool isPersistedBox(const char* type) const {
253305
static const char* persisted[] = {"stsz"};
254306
for (const char* p : persisted)
255307
if (StrView(type) == p) return true;
256308
return false;
257309
}
258310

311+
bool isValidType(const char* type, int offset=0) const {
312+
// Check if the type is a valid 4-character string
313+
return (type != nullptr && strlen(type) == 4 &&
314+
isalnum(type[offset]) && isalnum(type[offset+1]) &&
315+
isalnum(type[offset+2]) && isalnum(type[offset+3]));
316+
}
317+
259318
size_t checkParseOffset() {
260319
size_t current = parseOffset;
261320
const char* type = (char*)(buffer.data() + parseOffset + 4);
262321
for (int j = 0; j < buffer.available() - parseOffset - 4; j += 4) {
263-
if (isalpha(type[j]) && isalpha(type[j + 1]) && isalpha(type[j + 2]) &&
264-
isalpha(type[j + 3])) {
322+
if (isValidType(type, j)) {
265323
if (j != 0) {
266324
// report the data under the last valid box
267325
box.size = 0;
268326
box.data_size = j;
269327
box.level = static_cast<int>(levelStack.size()) + 1;
270328
box.data = buffer.data() + parseOffset;
271-
processCallback();
329+
processCallback(box);
272330
}
273331

274332
return j + parseOffset;

0 commit comments

Comments
 (0)