77 * Copyright(c) 2020 Google Inc.
88 */
99
10- // A Container Format is a file that embeds multiple data streams into a single file.
11- // Examples:
12- // 1) the ISO-BMFF family (MP4, HEVC, AVIF, MOV/QT, etc)
13- // 2) the Matroska family (MKV, WebM)
14- // 3) the RIFF family (WAV, AVI, WebP)
15- // 4) the OGG family (OGV, OPUS)
16- // 5) the ZIP family (ZIP, JAR, CBZ, EPUB, ODF, OOXML)
17-
18- // The ISO-BMFF container needs special processing because of its "compatible brands" array :(
19- // The Matroska container needs special processing because the sub-type can appear anywhere :(
20- // The OGG container needs special processing to determine what kind of streams are present :(
21- // The ZIP container needs special processing to determine what files are present inside it :(
10+ // https://mimesniff.spec.whatwg.org/ is a good resource.
11+ // https://github.com/h2non/filetype is an easy target for reverse-engineering.
2212
2313// NOTE: Because the ICO format also starts with a couple zero bytes, this tree will rely on the
2414// File Type box never going beyond 255 bytes in length which, seems unlikely according to
@@ -72,13 +62,9 @@ const fileSignatures = {
7262 'font/woff2' : [ [ 0x77 , 0x4F , 0x46 , 0x32 ] ] , // 'wOF2'
7363} ;
7464
75- // TODO: Eventually add support for various container formats so that:
76- // * an OGG container can be resolved to OGG Audio, OGG Video
77- // * an HEIF container can be resolved to AVIF, HEIC
78-
7965/**
80- * Represents a single byte in the tree. If this node terminates a known MIME type (see magic
81- * numbers above), then the mimeType field will be set.
66+ * Represents a single byte in the magic number tree. If this node terminates a known MIME type
67+ * (see magic numbers above), then the mimeType field will be set.
8268 */
8369class Node {
8470 /** @type {string } */
@@ -133,9 +119,9 @@ export function initialize() {
133119 }
134120
135121 if ( curNode . mimeType ) {
136- throw `File signature collision: ${ curNode . mimeType } overlaps with ${ mimeType } ` ;
122+ throw `Magic number collision: ${ curNode . mimeType } overlaps with ${ mimeType } ` ;
137123 } else if ( Object . keys ( curNode . children ) . length > 0 ) {
138- throw `${ mimeType } signature is not unique, it collides with other mime types` ;
124+ throw `${ mimeType } magic number is not unique, it collides with other mime types` ;
139125 }
140126 curNode . mimeType = mimeType ;
141127 } // for each signature
@@ -152,23 +138,17 @@ export function findMimeType(ab) {
152138 initialize ( ) ;
153139 }
154140
155- const depth = ab . byteLength < maxDepth ? ab . byteLength : maxDepth ;
156- const arr = new Uint8Array ( ab ) . subarray ( 0 , depth ) ;
141+ const arr = new Uint8Array ( ab ) ;
157142 let curNode = root ;
158143 let mimeType ;
159144 // Step through bytes, updating curNode as it walks down the byte tree.
160145 for ( const byte of arr ) {
161- // If this node has a placeholder child, just step into it.
162- if ( curNode . children [ '??' ] ) {
163- curNode = curNode . children [ '??' ] ;
164- continue ;
165- }
166- if ( curNode . children [ byte ] === undefined ) return undefined ;
167- curNode = curNode . children [ byte ] ;
168- if ( curNode . mimeType ) {
169- mimeType = curNode . mimeType ;
146+ // If we found the mimeType or it is unknown, break the loop.
147+ if ( ! curNode || ( mimeType = curNode . mimeType ) ) {
170148 break ;
171149 }
150+ // Move into the next byte's node (if it exists) or the placeholder node (if it exists).
151+ curNode = curNode . children [ byte ] || curNode . children [ '??' ] ;
172152 }
173153 return mimeType ;
174154}
0 commit comments