Skip to content

Commit e2f6863

Browse files
committed
MIME: Binary sniffs
1 parent 75854fd commit e2f6863

File tree

1 file changed

+287
-3
lines changed

1 file changed

+287
-3
lines changed

src/wp-includes/class-wp-mime-type.php

Lines changed: 287 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
* @since {WP_VERSION}
2727
*/
2828
class WP_Mime_Type {
29+
const BINARY_BYTES = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1C\x1D\x1E\x1F";
30+
2931
const TOKEN_CODE_POINTS = "!#$%&'*+-.0123456789^_`ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz|~";
3032

3133
/*
@@ -73,6 +75,16 @@ class WP_Mime_Type {
7375
*/
7476
protected $detected_apache_bug = false;
7577

78+
public function __construct( string $type, string $subtype, ?array $parameters = null ) {
79+
$this->type = strtolower( $type );
80+
$this->subtype = strtolower( $subtype );
81+
82+
/** @todo Validate parameters */
83+
if ( isset( $parameters ) ) {
84+
$this->parameters = $parameters;
85+
}
86+
}
87+
7688
/**
7789
* @todo Rename to `WP_Mime_Type::sniff()`?
7890
*/
@@ -116,9 +128,7 @@ public static function from_string( string $supplied_type ): ?self {
116128
}
117129

118130
// 10. Let mimeType be a new MIME type record whose type is type, in ASCII lowercase, and subtype is subtype, in ASCII lowercase.
119-
$self = new self();
120-
$self->type = strtolower( $type );
121-
$self->subtype = strtolower( $subtype );
131+
$self = new self( $type, $subtype );
122132

123133
// 11. While position is not past the end of input:
124134
$position = $subtype_start + $subtype_length;
@@ -213,6 +223,241 @@ public static function from_string( string $supplied_type ): ?self {
213223
return $self;
214224
}
215225

226+
public static function from_binary( string $resource_header ): ?self {
227+
if ( strlen( $resource_header ) === 1455 ) {
228+
$resource_header = substr( $resource_header, 0, 1455 );
229+
}
230+
231+
$length = strlen( $resource_header );
232+
233+
if ( str_starts_with( $resource_header, '%PDF-' ) ) {
234+
return new self( 'application', 'pdf' );
235+
}
236+
237+
$leading_ws = strspn( $resource_header, " \t\f\r\n" );
238+
239+
if ( 0 === substr_compare( $resource_header, '<?xml', $leading_ws, 5, false ) ) {
240+
return new self( 'text', 'xml' );
241+
}
242+
243+
$html_prefixes = array(
244+
array( '!DOCTYPE HTML', 13 ),
245+
array( 'HTML', 4 ),
246+
array( 'HEAD', 4 ),
247+
array( 'SCRIPT', 6 ),
248+
array( 'IFRAME', 6 ),
249+
array( 'H1', 2 ),
250+
array( 'DIV', 3 ),
251+
array( 'FONT', 4 ),
252+
array( 'TABLE', 5 ),
253+
array( 'A', 1 ),
254+
array( 'STYLE', 5 ),
255+
array( 'TITLE', 5 ),
256+
array( 'B', 1 ),
257+
array( 'BODY', 4 ),
258+
array( 'BR', 2 ),
259+
array( 'P', 1 ),
260+
array( '!--', 3 ),
261+
);
262+
263+
if ( $length > $leading_ws && '<' === $resource_header[ $leading_ws ] ) {
264+
$prefix_start = $leading_ws + 1;
265+
266+
foreach ( $html_prefixes as $prefix_pair ) {
267+
list( $prefix, $prefix_length ) = $prefix_pair;
268+
$prefix_end = $prefix_start + $prefix_length;
269+
270+
if (
271+
$length >= $prefix_end &&
272+
0 === substr_compare( $resource_header, $prefix[0], $prefix_start, $prefix_length, true ) &&
273+
( ' ' === $resource_header[ $prefix_end ] || '>' === $resource_header[ $prefix_end ] )
274+
) {
275+
return new self( 'text', 'html' );
276+
}
277+
}
278+
}
279+
280+
if ( str_starts_with( $resource_header, '%!PS-Adobe-' ) ) {
281+
return new self( 'application', 'postscript' );
282+
}
283+
284+
if (
285+
$length >= 4 &&
286+
(
287+
str_starts_with( $resource_header, "\xFE\xFF" ) || // UTF-16BE BOM
288+
str_starts_with( $resource_header, "\xFF\xFE" ) || // UTF-16LE BOM
289+
str_starts_with( $resource_header, "\xEF\xBB\xBF" ) // UTF-8 BOM
290+
)
291+
) {
292+
return new self( 'text', 'plain' );
293+
}
294+
295+
$sniffed_type = self::sniff_image_binary( $resource_header );
296+
if ( isset( $sniffed_type ) ) {
297+
return $sniffed_type;
298+
}
299+
300+
$sniffed_type = self::sniff_audio_video_binary( $resource_header );
301+
if ( isset( $sniffed_type ) ) {
302+
return $sniffed_type;
303+
}
304+
305+
$sniffed_type = self::sniff_archive_binary( $resource_header );
306+
if ( isset( $sniffed_type ) ) {
307+
return $sniffed_type;
308+
}
309+
310+
$nonbinary_length = strcspn( $resource_header, self::BINARY_BYTES );
311+
return $length === $nonbinary_length
312+
? new self( 'text', 'plain' )
313+
: new self( 'application', 'octet-stream' );
314+
}
315+
316+
private static function sniff_image_binary( string $resource_header ): ?self {
317+
$image_byte_patterns = array(
318+
array( "\x00\x00\x01\x00", 'image', 'x-icon' ), // Windows Icon
319+
array( "\x00\x00\x02\x00", 'image', 'x-icon' ), // Windows Cursor
320+
array( 'BM', 'image', 'bmp' ), // BMP
321+
array( 'GIF87a', 'image', 'gif' ), // GIF
322+
array( 'GIF89a', 'image', 'gif' ), // GIF
323+
array( "\x89PNG\r\n\x1A\n", 'image', 'png' ), // PNG
324+
array( "\xFF\xD8\xFF", 'image', 'jpg' ), // PNG
325+
);
326+
327+
foreach ( $image_byte_patterns as $pattern_pair ) {
328+
list( $prefix, $type, $subtype ) = $pattern_pair;
329+
330+
if ( str_starts_with( $resource_header, $prefix ) ) {
331+
return new self( $type, $subtype );
332+
}
333+
}
334+
335+
if (
336+
strlen( $resource_header ) >= 14 &&
337+
str_starts_with( $resource_header, 'RIFF' ) &&
338+
0 === substr_compare( $resource_header, 'WEBPVP', 8, 6, false )
339+
) {
340+
return new self( 'image', 'webp' );
341+
}
342+
343+
return null;
344+
}
345+
346+
private static function sniff_audio_video_binary( string $resource_header ): ?self {
347+
$media_prefixes = array(
348+
array( 'ID3', 'audio', 'mpeg' ), // ID3v2 MP3
349+
array( "OggS\x00", 'application', 'ogg' ), // Ogg
350+
array( "MThd\x00\x00\x00\x06", 'audio', 'midi' ), // MIDI
351+
);
352+
353+
foreach ( $media_prefixes as $prefix_pair ) {
354+
list( $prefix, $type, $subtype ) = $prefix_pair;
355+
356+
if ( str_starts_with( $resource_header, $prefix ) ) {
357+
return new self( $type, $subtype );
358+
}
359+
}
360+
361+
$length = strlen( $resource_header );
362+
363+
if (
364+
$length >= 12 &&
365+
str_starts_with( $resource_header, 'FORM' ) &&
366+
0 === substr_compare( $resource_header, 'AIFF', 8, 4, false )
367+
) {
368+
return new self( 'audio', 'aiff' );
369+
}
370+
371+
if ( $length >= 12 && str_starts_with( $resource_header, 'RIFF' ) ) {
372+
if ( 0 === substr_compare( $resource_header, 'AVI ', 8, 4, false ) ) {
373+
return new self( 'video', 'avi' );
374+
}
375+
376+
if ( 0 === substr_compare( $resource_header, 'WAVE', 8, 4, false ) ) {
377+
return new self( 'audio', 'wave' );
378+
}
379+
}
380+
381+
$media_type = self::sniff_mp4_binary( $resource_header );
382+
if ( isset( $media_type ) ) {
383+
return $media_type;
384+
}
385+
386+
$media_type = self::sniff_webm_binary( $resource_header );
387+
if ( isset( $media_type ) ) {
388+
return $media_type;
389+
}
390+
391+
$media_type = self::sniff_mp3_without_id3_binary( $resource_header );
392+
if ( isset( $media_type ) ) {
393+
return $media_type;
394+
}
395+
396+
return null;
397+
}
398+
399+
private static function sniff_mp4_binary( string $resource_header ): ?self {
400+
$length = strlen( $resource_header );
401+
402+
if ( $length < 12 ) {
403+
return null;
404+
}
405+
406+
$box_size = unpack( 'N', $resource_header, 0 )[0];
407+
if ( $length < $box_size || 0 !== ( $box_size % 4 ) ) {
408+
return null;
409+
}
410+
411+
if ( 0 !== substr_compare( $resource_header, 'ftyp', 4, 4, false ) ) {
412+
return null;
413+
}
414+
415+
if ( 0 === substr_compare( $resource_header, 'mp4', 8, 3, false ) ) {
416+
return new self( 'video', 'mp4' );
417+
}
418+
419+
$bytes_read = 16;
420+
while ( $bytes_read < $box_size ) {
421+
if ( 0 === substr_compare( $resource_header, 'mp4', $bytes_read, 3, false ) ) {
422+
return new self( 'video', 'mp4' );
423+
}
424+
425+
$bytes_read += 4;
426+
}
427+
428+
return null;
429+
}
430+
431+
/**
432+
* @see https://mimesniff.spec.whatwg.org/#signature-for-webm
433+
*/
434+
private static function sniff_webm_binary( string $resource_header ): ?self {
435+
throw new Exception( 'Not Implemented' );
436+
}
437+
438+
/**
439+
* @see https://mimesniff.spec.whatwg.org/#signature-for-mp3-without-id3
440+
*/
441+
private static function sniff_mp3_without_id3_binary( string $resource_header ): ?self {
442+
throw new Exception( 'Not Implemented' );
443+
}
444+
445+
private static function sniff_archive_binary( string $resource_header ): ?self {
446+
$archive_prefixes = array(
447+
array( "\x1F\x8B\x08", 'application', 'x-gzip' ), // GZIP
448+
array( "PK\x03\x04", 'application', 'zip' ), // ZIP
449+
array( "Rar!\x1A\x07\x00", 'application', 'x-gzip' ), // RAR 4.x
450+
);
451+
452+
foreach ( $archive_prefixes as $prefix_pair ) {
453+
list( $prefix, $type, $subtype ) = $prefix_pair;
454+
455+
if ( str_starts_with( $resource_header, $prefix ) ) {
456+
return new self( $type, $subtype );
457+
}
458+
}
459+
}
460+
216461
public function serialize(): string {
217462
$serialization = $this->essence();
218463

@@ -284,6 +529,13 @@ public function is_html(): bool {
284529
return 'text' === $this->type && 'html' === $this->subtype;
285530
}
286531

532+
public function is_html_family(): bool {
533+
return (
534+
$this->is_html() ||
535+
( 'application' === $this->type && 'xhtml+xml' === $this->subtype )
536+
);
537+
}
538+
287539
public function is_image(): bool {
288540
return 'image' === $this->type;
289541
}
@@ -363,4 +615,36 @@ public function is_zip(): bool {
363615
str_ends_with( $this->subtype, '+zip' )
364616
);
365617
}
618+
619+
/**
620+
* Returns a parsed MIME media type if the given string represents a JavaScript media type.
621+
*
622+
* @since {WP_VERSION}
623+
*
624+
* @param string $supplied_type
625+
* @return self|null
626+
*/
627+
public static function sniff_javascript( string $supplied_type ): ?self {
628+
$mime_type = self::from_string( $supplied_type );
629+
630+
return isset( $mime_type ) && $mime_type->is_javascript()
631+
? $mime_type
632+
: null;
633+
}
634+
635+
/**
636+
* Returns a parsed MIME media type if the given string represents a JSON media type.
637+
*
638+
* @since {WP_VERSION}
639+
*
640+
* @param string $supplied_type
641+
* @return self|null
642+
*/
643+
public static function sniff_json( string $supplied_type ): ?self {
644+
$mime_type = self::from_string( $supplied_type );
645+
646+
return isset( $mime_type ) && $mime_type->is_json()
647+
? $mime_type
648+
: null;
649+
}
366650
}

0 commit comments

Comments
 (0)