Skip to content

Commit ce1f14a

Browse files
committed
MP3HeaderParser
1 parent cb2f082 commit ce1f14a

File tree

1 file changed

+394
-0
lines changed

1 file changed

+394
-0
lines changed
Lines changed: 394 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,394 @@
1+
#pragma once
2+
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
3+
4+
namespace audio_tools {
5+
6+
/**
7+
* @brief MP3 header parser to check if the data is a valid mp3 and
8+
* to extract some relevant audio information.
9+
* @ingroup codecs
10+
* @ingroup decoder
11+
* @author Phil Schatzmann
12+
* @copyright GPLv3
13+
*/
14+
15+
class MP3HeaderParser {
16+
// MPEG audio frame header
17+
// variables are declared in their serialized order
18+
// includes crc value
19+
struct __attribute__((packed)) FrameHeader {
20+
static const unsigned int SERIALIZED_SIZE = 4;
21+
22+
// bitmasks for frame header fields grouped by byte
23+
static const unsigned char FRAMESYNC_FIRST_BYTEMASK = 0b11111111;
24+
25+
static const unsigned char FRAMESYNC_SECOND_BYTEMASK = 0b1110000;
26+
static const unsigned char AUDIO_VERSION_MASK = 0b00011000;
27+
static const unsigned char LAYER_DESCRIPTION_MASK = 0b00000110;
28+
static const unsigned char PROTECTION_BIT_MASK = 0b00000001;
29+
30+
static const unsigned char BITRATE_INDEX_MASK = 0b11110000;
31+
static const unsigned char SAMPLERATE_INDEX_MASK = 0b00001100;
32+
static const unsigned char PADDING_BIT_MASK = 0b00000010;
33+
static const unsigned char PRIVATE_BIT_MASK = 0b00000001;
34+
35+
static const unsigned char CHANNEL_MODE_MASK = 0b11000000;
36+
static const unsigned char MODE_EXTENTION_MASK = 0b00110000;
37+
static const unsigned char COPYRIGHT_BIT_MASK = 0b00001000;
38+
static const unsigned char ORIGINAL_BIT_MASK = 0b00000100;
39+
static const unsigned char EMPHASIS_MASK = 0b00000011;
40+
41+
char FrameSyncByte;
42+
bool FrameSyncBits : 3;
43+
44+
// indicates MPEG standard version
45+
enum class AudioVersionID : unsigned {
46+
MPEG_2_5 = 0b00,
47+
INVALID = 0b01, // reserved
48+
MPEG_2 = 0b10,
49+
MPEG_1 = 0b11,
50+
} AudioVersion : 2;
51+
52+
// indicates which audio layer of the MPEG standard
53+
enum class LayerID : unsigned {
54+
INVALID = 0b00, // reserved
55+
LAYER_3 = 0b01,
56+
LAYER_2 = 0b10,
57+
LAYER_1 = 0b11,
58+
} Layer : 2;
59+
60+
// indicates whether theres a 16 bit crc checksum following the header
61+
bool Protection : 1;
62+
63+
// sample & bitrate indexes meaning differ depending on MPEG version
64+
// use getBitrate() and GetSamplerate()
65+
bool BitrateIndex : 4;
66+
bool SampleRateIndex : 2;
67+
68+
// indicates whether the audio data is padded with 1 extra byte (slot)
69+
bool Padding : 1;
70+
71+
// this is only informative
72+
bool Private : 1;
73+
74+
// indicates channel mode
75+
enum class ChannelModeID : unsigned {
76+
STEREO = 0b00,
77+
JOINT = 0b01, // joint stereo
78+
DUAL = 0b10, // dual channel (2 mono channels)
79+
SINGLE = 0b11, // single channel (mono)
80+
} ChannelMode : 2;
81+
82+
// Only used in joint channel mode. Meaning differ depending on audio layer
83+
// Use GetExtentionMode()
84+
bool ExtentionMode : 2;
85+
86+
// indicates whether the audio is copyrighted
87+
bool Copyright : 1;
88+
89+
// indicates whether the frame is located on the original media or a copy
90+
bool Original : 1;
91+
92+
uint16_t crc; // crc data if Protection is true
93+
94+
// indicates to the decoder that the file must be de-emphasized, ie the
95+
// decoder must 're-equalize' the sound after a Dolby-like noise supression.
96+
// It is rarely used.
97+
enum class EmphasisID : unsigned {
98+
NONE = 0b00,
99+
MS_50_15 = 0b01,
100+
INVALID = 0b10,
101+
CCIT_J17 = 0b10,
102+
} Emphasis : 2;
103+
104+
enum SpecialBitrate {
105+
INVALID = -8000,
106+
ANY = 0,
107+
};
108+
109+
signed int getBitrate() const {
110+
// version, layer, bit index
111+
static signed char rateTable[4][4][16] = {
112+
// version[00] = MPEG_2_5
113+
{
114+
// layer[00] = INVALID
115+
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
116+
// layer[01] = LAYER_3
117+
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
118+
// layer[10] = LAYER_2
119+
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
120+
// layer[11] = LAYER_1
121+
{0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
122+
},
123+
124+
// version[01] = INVALID
125+
{
126+
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
127+
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
128+
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
129+
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
130+
},
131+
132+
// version[10] = MPEG_2
133+
{
134+
// layer[00] = INVALID
135+
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
136+
// layer[01] = LAYER_3
137+
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
138+
// layer[10] = LAYER_2
139+
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
140+
// layer[11] = LAYER_1
141+
{0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
142+
},
143+
144+
// version[11] = MPEG_1
145+
{
146+
// layer[00] = INVALID
147+
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
148+
// layer[01] = LAYER_3
149+
{0, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, -1},
150+
// layer[10] = LAYER_2
151+
{0, 4, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, -1},
152+
// layer[11] = LAYER_1
153+
{0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, -1},
154+
},
155+
};
156+
157+
return rateTable[AudioVersion][Layer][BitrateIndex] * 8000;
158+
}
159+
160+
enum SpecialSampleRate {
161+
RESERVED = 0,
162+
};
163+
164+
unsigned short getSampleRate() const {
165+
// version, sample rate index
166+
static unsigned short rateTable[4][4] = {
167+
// version[00] = MPEG_2_5
168+
{11025, 12000, 8000, 0},
169+
// version[01] = INVALID
170+
{0, 0, 0, 0},
171+
// version[10] = MPEG_2
172+
{22050, 24000, 16000, 0},
173+
// version[11] = MPEG_1
174+
{44100, 48000, 32000, 0},
175+
};
176+
177+
return rateTable[AudioVersion][SampleRateIndex];
178+
}
179+
180+
int getFrameLength() {
181+
return int((144 * getBitrate() / getSampleRate()) + header.Padding);
182+
}
183+
184+
};
185+
186+
public:
187+
/// parses the header string and returns true if this is a valid mp3 file
188+
bool isValid(uint8_t* data, int len) {
189+
memset(&header, 0, sizeof(header));
190+
StrView str((char*)data, len);
191+
192+
if (str.startsWith("ID3")) {
193+
return true;
194+
}
195+
196+
int pos = seekFrameSync(str);
197+
if (pos == -1) {
198+
LOGE("Could not find FrameSync");
199+
return false;
200+
}
201+
202+
// xing header
203+
if (pos > 0 && str.contains("Xing")) {
204+
return true;
205+
}
206+
207+
// xing header
208+
if (pos > 0 && str.contains("Info")) {
209+
return true;
210+
}
211+
212+
int len_available = len - pos;
213+
if (len_available < sizeof(header)) {
214+
LOGE("Not enough data to determine mp3 header");
215+
return false;
216+
}
217+
218+
// fill header with data
219+
StrView header_str((char*)data + pos, len_available);
220+
header = readFrameHeader(header_str);
221+
222+
// check end of frame: it must contains a synch word
223+
int pos = findSynchWord(header_str.c_str(), header_str.length());
224+
int pos_expected = getFrameLength();
225+
if (pos_expected < header_str.length()){
226+
if (pos != pos_expected){
227+
LOGE("Expected SynchWord missing");
228+
return false;
229+
}
230+
}
231+
232+
// calculate crc
233+
uint16_t crc = crc16((uint8_t*)header_str.c_str(),
234+
sizeof(FrameHeader) - sizeof(uint16_t));
235+
// validate
236+
return FrameReason::VALID == validateFrameHeader(header, crc);
237+
}
238+
239+
uint16_t getSampleRate() const { return header.getSampleRate(); }
240+
241+
int getBitrate() const { return header.getBitrate(); }
242+
243+
/// Determines the frame length
244+
int getFrameLength() {
245+
return header.getFrameLength();
246+
}
247+
248+
const char* getVersionStr() const {
249+
return header.AudioVersion == FrameHeader::AudioVersionID::MPEG_1 ? "1"
250+
: header.AudioVersion == FrameHeader::AudioVersionID::MPEG_2 ? "2"
251+
: header.AudioVersion == FrameHeader::AudioVersionID::MPEG_2_5
252+
? "2.5"
253+
: "INVALID";
254+
}
255+
256+
const char* getLayerStr() const {
257+
return header.Layer == FrameHeader::LayerID::LAYER_1 ? "1"
258+
: header.Layer == FrameHeader::LayerID::LAYER_2 ? "2"
259+
: header.Layer == FrameHeader::LayerID::LAYER_3 ? "3"
260+
: "INVALID";
261+
}
262+
263+
// provides the parsed MP3 frame header
264+
FrameHeader getFrameHeader() { return header; }
265+
266+
/// Finds the mp3/aac sync word
267+
int findSyncWord(uint8_t* buf, int nBytes, uint8_t SYNCWORDH = 0xFF,
268+
uint8_t SYNCWORDL = 0xF0) {
269+
for (int i = 0; i < nBytes - 1; i++) {
270+
if ((buf[i + 0] & SYNCWORDH) == SYNCWORDH &&
271+
(buf[i + 1] & SYNCWORDL) == SYNCWORDL)
272+
return i;
273+
}
274+
return -1;
275+
}
276+
277+
protected:
278+
FrameHeader header;
279+
280+
uint16_t crc16(const uint8_t* data_p, size_t length) {
281+
uint8_t x;
282+
uint16_t crc = 0xFFFF;
283+
284+
while (length--) {
285+
x = crc >> 8 ^ *data_p++;
286+
x ^= x >> 4;
287+
crc = (crc << 8) ^ ((unsigned short)(x << 12)) ^
288+
((unsigned short)(x << 5)) ^ ((unsigned short)x);
289+
}
290+
return crc;
291+
}
292+
293+
// Seeks to the byte at the end of the next continuous run of 11 set bits.
294+
//(ie. after seeking the cursor will be on the byte of which its 3 most
295+
// significant bits are part of the frame sync)
296+
int seekFrameSync(StrView str) {
297+
char cur;
298+
for (int j = 0; j < str.length() - 1; j++) {
299+
cur = str[j];
300+
// read bytes until EOF or a byte with all bits set is encountered
301+
if ((cur & 0b11111111) != 0b11111111) continue;
302+
303+
// peek next byte, ensure its not past EOF, and check that its 3 most
304+
// significant bits are set to complete the continuous run of 11
305+
char next = str[j + 1];
306+
307+
if ((next & 0b11100000) != 0b11100000) {
308+
// if the next byte does not have its 3 most significant bits set it is
309+
// not the end of the framesync and it also cannot be the start of a
310+
// framesync so just skip over it here without the check
311+
continue;
312+
}
313+
return j;
314+
}
315+
316+
return -1;
317+
}
318+
319+
FrameHeader readFrameHeader(StrView in) {
320+
FrameHeader header;
321+
memcpy(&header, in.c_str(), sizeof(header));
322+
return header;
323+
}
324+
325+
enum class FrameReason {
326+
VALID,
327+
INVALID_BITRATE_FOR_VERSION,
328+
INVALID_SAMPLERATE_FOR_VERSION,
329+
INVALID_MPEG_VERSION,
330+
INVALID_LAYER,
331+
INVALID_LAYER_II_BITRATE_AND_MODE,
332+
INVALID_EMPHASIS,
333+
INVALID_CRC,
334+
};
335+
336+
FrameReason validateFrameHeader(const FrameHeader& header, uint16_t crc) {
337+
if (header.Protection) {
338+
if (header.crc != crc) {
339+
LOGI("invalid CRC");
340+
return FrameReason::INVALID_CRC;
341+
}
342+
}
343+
344+
if (header.AudioVersion == FrameHeader::AudioVersionID::INVALID) {
345+
LOGI("invalid mpeg version");
346+
return FrameReason::INVALID_MPEG_VERSION;
347+
}
348+
349+
if (header.Layer == FrameHeader::LayerID::INVALID) {
350+
LOGI("invalid layer");
351+
return FrameReason::INVALID_LAYER;
352+
}
353+
354+
if (header.getBitrate() == FrameHeader::SpecialBitrate::INVALID) {
355+
LOGI("invalid bitrate");
356+
return FrameReason::INVALID_BITRATE_FOR_VERSION;
357+
}
358+
359+
if (header.getSampleRate() == FrameHeader::SpecialSampleRate::RESERVED) {
360+
LOGI("invalid samplerate");
361+
return FrameReason::INVALID_SAMPLERATE_FOR_VERSION;
362+
}
363+
364+
// For Layer II there are some combinations of bitrate and mode which are
365+
// not allowed
366+
if (header.Layer == FrameHeader::LayerID::LAYER_2) {
367+
if (header.ChannelMode == FrameHeader::ChannelModeID::SINGLE) {
368+
if (header.getBitrate() >= 224000) {
369+
LOGI("invalid bitrate >224000");
370+
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
371+
}
372+
} else {
373+
if (header.getBitrate() >= 32000 && header.getBitrate() <= 56000) {
374+
LOGI("invalid bitrate >32000");
375+
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
376+
}
377+
378+
if (header.getBitrate() == 80000) {
379+
LOGI("invalid bitrate >80000");
380+
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
381+
}
382+
}
383+
}
384+
385+
if (header.Emphasis == FrameHeader::EmphasisID::INVALID) {
386+
LOGI("invalid Emphasis");
387+
return FrameReason::INVALID_EMPHASIS;
388+
}
389+
390+
return FrameReason::VALID;
391+
}
392+
};
393+
394+
} // namespace audio_tools

0 commit comments

Comments
 (0)