1
+ #pragma once
2
+ #include " AudioTools/CoreAudio/AudioBasic/StrView.h"
3
+
4
+ namespace audio_tools {
5
+
6
+ /* *
7
+ * @brief MP3 header parser to check if the data is a valid mp3 and
8
+ * to extract some relevant audio information.
9
+ * @ingroup codecs
10
+ * @ingroup decoder
11
+ * @author Phil Schatzmann
12
+ * @copyright GPLv3
13
+ */
14
+
15
+ class MP3HeaderParser {
16
+ // MPEG audio frame header
17
+ // variables are declared in their serialized order
18
+ // includes crc value
19
+ struct __attribute__ ((packed)) FrameHeader {
20
+ static const unsigned int SERIALIZED_SIZE = 4 ;
21
+
22
+ // bitmasks for frame header fields grouped by byte
23
+ static const unsigned char FRAMESYNC_FIRST_BYTEMASK = 0b11111111 ;
24
+
25
+ static const unsigned char FRAMESYNC_SECOND_BYTEMASK = 0b1110000 ;
26
+ static const unsigned char AUDIO_VERSION_MASK = 0b00011000 ;
27
+ static const unsigned char LAYER_DESCRIPTION_MASK = 0b00000110 ;
28
+ static const unsigned char PROTECTION_BIT_MASK = 0b00000001 ;
29
+
30
+ static const unsigned char BITRATE_INDEX_MASK = 0b11110000 ;
31
+ static const unsigned char SAMPLERATE_INDEX_MASK = 0b00001100 ;
32
+ static const unsigned char PADDING_BIT_MASK = 0b00000010 ;
33
+ static const unsigned char PRIVATE_BIT_MASK = 0b00000001 ;
34
+
35
+ static const unsigned char CHANNEL_MODE_MASK = 0b11000000 ;
36
+ static const unsigned char MODE_EXTENTION_MASK = 0b00110000 ;
37
+ static const unsigned char COPYRIGHT_BIT_MASK = 0b00001000 ;
38
+ static const unsigned char ORIGINAL_BIT_MASK = 0b00000100 ;
39
+ static const unsigned char EMPHASIS_MASK = 0b00000011 ;
40
+
41
+ char FrameSyncByte;
42
+ bool FrameSyncBits : 3 ;
43
+
44
+ // indicates MPEG standard version
45
+ enum class AudioVersionID : unsigned {
46
+ MPEG_2_5 = 0b00 ,
47
+ INVALID = 0b01 , // reserved
48
+ MPEG_2 = 0b10 ,
49
+ MPEG_1 = 0b11 ,
50
+ } AudioVersion : 2 ;
51
+
52
+ // indicates which audio layer of the MPEG standard
53
+ enum class LayerID : unsigned {
54
+ INVALID = 0b00 , // reserved
55
+ LAYER_3 = 0b01 ,
56
+ LAYER_2 = 0b10 ,
57
+ LAYER_1 = 0b11 ,
58
+ } Layer : 2 ;
59
+
60
+ // indicates whether theres a 16 bit crc checksum following the header
61
+ bool Protection : 1 ;
62
+
63
+ // sample & bitrate indexes meaning differ depending on MPEG version
64
+ // use getBitrate() and GetSamplerate()
65
+ bool BitrateIndex : 4 ;
66
+ bool SampleRateIndex : 2 ;
67
+
68
+ // indicates whether the audio data is padded with 1 extra byte (slot)
69
+ bool Padding : 1 ;
70
+
71
+ // this is only informative
72
+ bool Private : 1 ;
73
+
74
+ // indicates channel mode
75
+ enum class ChannelModeID : unsigned {
76
+ STEREO = 0b00 ,
77
+ JOINT = 0b01 , // joint stereo
78
+ DUAL = 0b10 , // dual channel (2 mono channels)
79
+ SINGLE = 0b11 , // single channel (mono)
80
+ } ChannelMode : 2 ;
81
+
82
+ // Only used in joint channel mode. Meaning differ depending on audio layer
83
+ // Use GetExtentionMode()
84
+ bool ExtentionMode : 2 ;
85
+
86
+ // indicates whether the audio is copyrighted
87
+ bool Copyright : 1 ;
88
+
89
+ // indicates whether the frame is located on the original media or a copy
90
+ bool Original : 1 ;
91
+
92
+ uint16_t crc; // crc data if Protection is true
93
+
94
+ // indicates to the decoder that the file must be de-emphasized, ie the
95
+ // decoder must 're-equalize' the sound after a Dolby-like noise supression.
96
+ // It is rarely used.
97
+ enum class EmphasisID : unsigned {
98
+ NONE = 0b00 ,
99
+ MS_50_15 = 0b01 ,
100
+ INVALID = 0b10 ,
101
+ CCIT_J17 = 0b10 ,
102
+ } Emphasis : 2 ;
103
+
104
+ enum SpecialBitrate {
105
+ INVALID = -8000 ,
106
+ ANY = 0 ,
107
+ };
108
+
109
+ signed int getBitrate () const {
110
+ // version, layer, bit index
111
+ static signed char rateTable[4 ][4 ][16 ] = {
112
+ // version[00] = MPEG_2_5
113
+ {
114
+ // layer[00] = INVALID
115
+ {-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 },
116
+ // layer[01] = LAYER_3
117
+ {0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , -1 },
118
+ // layer[10] = LAYER_2
119
+ {0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , -1 },
120
+ // layer[11] = LAYER_1
121
+ {0 , 4 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 28 , 32 , -1 },
122
+ },
123
+
124
+ // version[01] = INVALID
125
+ {
126
+ {-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 },
127
+ {-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 },
128
+ {-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 },
129
+ {-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 },
130
+ },
131
+
132
+ // version[10] = MPEG_2
133
+ {
134
+ // layer[00] = INVALID
135
+ {-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 },
136
+ // layer[01] = LAYER_3
137
+ {0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , -1 },
138
+ // layer[10] = LAYER_2
139
+ {0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , -1 },
140
+ // layer[11] = LAYER_1
141
+ {0 , 4 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , 22 , 24 , 28 , 32 , -1 },
142
+ },
143
+
144
+ // version[11] = MPEG_1
145
+ {
146
+ // layer[00] = INVALID
147
+ {-1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 },
148
+ // layer[01] = LAYER_3
149
+ {0 , 4 , 5 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 20 , 24 , 28 , 32 , 40 , -1 },
150
+ // layer[10] = LAYER_2
151
+ {0 , 4 , 6 , 7 , 8 , 10 , 12 , 14 , 16 , 20 , 24 , 28 , 32 , 40 , 48 , -1 },
152
+ // layer[11] = LAYER_1
153
+ {0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 , 32 , 36 , 40 , 44 , 48 , 52 , 56 , -1 },
154
+ },
155
+ };
156
+
157
+ return rateTable[AudioVersion][Layer][BitrateIndex] * 8000 ;
158
+ }
159
+
160
+ enum SpecialSampleRate {
161
+ RESERVED = 0 ,
162
+ };
163
+
164
+ unsigned short getSampleRate () const {
165
+ // version, sample rate index
166
+ static unsigned short rateTable[4 ][4 ] = {
167
+ // version[00] = MPEG_2_5
168
+ {11025 , 12000 , 8000 , 0 },
169
+ // version[01] = INVALID
170
+ {0 , 0 , 0 , 0 },
171
+ // version[10] = MPEG_2
172
+ {22050 , 24000 , 16000 , 0 },
173
+ // version[11] = MPEG_1
174
+ {44100 , 48000 , 32000 , 0 },
175
+ };
176
+
177
+ return rateTable[AudioVersion][SampleRateIndex];
178
+ }
179
+
180
+ int getFrameLength () {
181
+ return int ((144 * getBitrate () / getSampleRate ()) + header.Padding );
182
+ }
183
+
184
+ };
185
+
186
+ public:
187
+ // / parses the header string and returns true if this is a valid mp3 file
188
+ bool isValid (uint8_t * data, int len) {
189
+ memset (&header, 0 , sizeof (header));
190
+ StrView str ((char *)data, len);
191
+
192
+ if (str.startsWith (" ID3" )) {
193
+ return true ;
194
+ }
195
+
196
+ int pos = seekFrameSync (str);
197
+ if (pos == -1 ) {
198
+ LOGE (" Could not find FrameSync" );
199
+ return false ;
200
+ }
201
+
202
+ // xing header
203
+ if (pos > 0 && str.contains (" Xing" )) {
204
+ return true ;
205
+ }
206
+
207
+ // xing header
208
+ if (pos > 0 && str.contains (" Info" )) {
209
+ return true ;
210
+ }
211
+
212
+ int len_available = len - pos;
213
+ if (len_available < sizeof (header)) {
214
+ LOGE (" Not enough data to determine mp3 header" );
215
+ return false ;
216
+ }
217
+
218
+ // fill header with data
219
+ StrView header_str ((char *)data + pos, len_available);
220
+ header = readFrameHeader (header_str);
221
+
222
+ // check end of frame: it must contains a synch word
223
+ int pos = findSynchWord (header_str.c_str (), header_str.length ());
224
+ int pos_expected = getFrameLength ();
225
+ if (pos_expected < header_str.length ()){
226
+ if (pos != pos_expected){
227
+ LOGE (" Expected SynchWord missing" );
228
+ return false ;
229
+ }
230
+ }
231
+
232
+ // calculate crc
233
+ uint16_t crc = crc16 ((uint8_t *)header_str.c_str (),
234
+ sizeof (FrameHeader) - sizeof (uint16_t ));
235
+ // validate
236
+ return FrameReason::VALID == validateFrameHeader (header, crc);
237
+ }
238
+
239
+ uint16_t getSampleRate () const { return header.getSampleRate (); }
240
+
241
+ int getBitrate () const { return header.getBitrate (); }
242
+
243
+ // / Determines the frame length
244
+ int getFrameLength () {
245
+ return header.getFrameLength ();
246
+ }
247
+
248
+ const char * getVersionStr () const {
249
+ return header.AudioVersion == FrameHeader::AudioVersionID::MPEG_1 ? " 1"
250
+ : header.AudioVersion == FrameHeader::AudioVersionID::MPEG_2 ? " 2"
251
+ : header.AudioVersion == FrameHeader::AudioVersionID::MPEG_2_5
252
+ ? " 2.5"
253
+ : " INVALID" ;
254
+ }
255
+
256
+ const char * getLayerStr () const {
257
+ return header.Layer == FrameHeader::LayerID::LAYER_1 ? " 1"
258
+ : header.Layer == FrameHeader::LayerID::LAYER_2 ? " 2"
259
+ : header.Layer == FrameHeader::LayerID::LAYER_3 ? " 3"
260
+ : " INVALID" ;
261
+ }
262
+
263
+ // provides the parsed MP3 frame header
264
+ FrameHeader getFrameHeader () { return header; }
265
+
266
+ // / Finds the mp3/aac sync word
267
+ int findSyncWord (uint8_t * buf, int nBytes, uint8_t SYNCWORDH = 0xFF ,
268
+ uint8_t SYNCWORDL = 0xF0 ) {
269
+ for (int i = 0 ; i < nBytes - 1 ; i++) {
270
+ if ((buf[i + 0 ] & SYNCWORDH) == SYNCWORDH &&
271
+ (buf[i + 1 ] & SYNCWORDL) == SYNCWORDL)
272
+ return i;
273
+ }
274
+ return -1 ;
275
+ }
276
+
277
+ protected:
278
+ FrameHeader header;
279
+
280
+ uint16_t crc16 (const uint8_t * data_p, size_t length) {
281
+ uint8_t x;
282
+ uint16_t crc = 0xFFFF ;
283
+
284
+ while (length--) {
285
+ x = crc >> 8 ^ *data_p++;
286
+ x ^= x >> 4 ;
287
+ crc = (crc << 8 ) ^ ((unsigned short )(x << 12 )) ^
288
+ ((unsigned short )(x << 5 )) ^ ((unsigned short )x);
289
+ }
290
+ return crc;
291
+ }
292
+
293
+ // Seeks to the byte at the end of the next continuous run of 11 set bits.
294
+ // (ie. after seeking the cursor will be on the byte of which its 3 most
295
+ // significant bits are part of the frame sync)
296
+ int seekFrameSync (StrView str) {
297
+ char cur;
298
+ for (int j = 0 ; j < str.length () - 1 ; j++) {
299
+ cur = str[j];
300
+ // read bytes until EOF or a byte with all bits set is encountered
301
+ if ((cur & 0b11111111 ) != 0b11111111 ) continue ;
302
+
303
+ // peek next byte, ensure its not past EOF, and check that its 3 most
304
+ // significant bits are set to complete the continuous run of 11
305
+ char next = str[j + 1 ];
306
+
307
+ if ((next & 0b11100000 ) != 0b11100000 ) {
308
+ // if the next byte does not have its 3 most significant bits set it is
309
+ // not the end of the framesync and it also cannot be the start of a
310
+ // framesync so just skip over it here without the check
311
+ continue ;
312
+ }
313
+ return j;
314
+ }
315
+
316
+ return -1 ;
317
+ }
318
+
319
+ FrameHeader readFrameHeader (StrView in) {
320
+ FrameHeader header;
321
+ memcpy (&header, in.c_str (), sizeof (header));
322
+ return header;
323
+ }
324
+
325
+ enum class FrameReason {
326
+ VALID,
327
+ INVALID_BITRATE_FOR_VERSION,
328
+ INVALID_SAMPLERATE_FOR_VERSION,
329
+ INVALID_MPEG_VERSION,
330
+ INVALID_LAYER,
331
+ INVALID_LAYER_II_BITRATE_AND_MODE,
332
+ INVALID_EMPHASIS,
333
+ INVALID_CRC,
334
+ };
335
+
336
+ FrameReason validateFrameHeader (const FrameHeader& header, uint16_t crc) {
337
+ if (header.Protection ) {
338
+ if (header.crc != crc) {
339
+ LOGI (" invalid CRC" );
340
+ return FrameReason::INVALID_CRC;
341
+ }
342
+ }
343
+
344
+ if (header.AudioVersion == FrameHeader::AudioVersionID::INVALID) {
345
+ LOGI (" invalid mpeg version" );
346
+ return FrameReason::INVALID_MPEG_VERSION;
347
+ }
348
+
349
+ if (header.Layer == FrameHeader::LayerID::INVALID) {
350
+ LOGI (" invalid layer" );
351
+ return FrameReason::INVALID_LAYER;
352
+ }
353
+
354
+ if (header.getBitrate () == FrameHeader::SpecialBitrate::INVALID) {
355
+ LOGI (" invalid bitrate" );
356
+ return FrameReason::INVALID_BITRATE_FOR_VERSION;
357
+ }
358
+
359
+ if (header.getSampleRate () == FrameHeader::SpecialSampleRate::RESERVED) {
360
+ LOGI (" invalid samplerate" );
361
+ return FrameReason::INVALID_SAMPLERATE_FOR_VERSION;
362
+ }
363
+
364
+ // For Layer II there are some combinations of bitrate and mode which are
365
+ // not allowed
366
+ if (header.Layer == FrameHeader::LayerID::LAYER_2) {
367
+ if (header.ChannelMode == FrameHeader::ChannelModeID::SINGLE) {
368
+ if (header.getBitrate () >= 224000 ) {
369
+ LOGI (" invalid bitrate >224000" );
370
+ return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
371
+ }
372
+ } else {
373
+ if (header.getBitrate () >= 32000 && header.getBitrate () <= 56000 ) {
374
+ LOGI (" invalid bitrate >32000" );
375
+ return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
376
+ }
377
+
378
+ if (header.getBitrate () == 80000 ) {
379
+ LOGI (" invalid bitrate >80000" );
380
+ return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
381
+ }
382
+ }
383
+ }
384
+
385
+ if (header.Emphasis == FrameHeader::EmphasisID::INVALID) {
386
+ LOGI (" invalid Emphasis" );
387
+ return FrameReason::INVALID_EMPHASIS;
388
+ }
389
+
390
+ return FrameReason::VALID;
391
+ }
392
+ };
393
+
394
+ } // namespace audio_tools
0 commit comments