Skip to content

Commit 304205a

Browse files
committed
MIME: Introduce MIME type parser
1 parent 2ae6561 commit 304205a

File tree

1 file changed

+245
-0
lines changed

1 file changed

+245
-0
lines changed
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
<?php
2+
3+
/**
4+
* @see https://www.rfc-editor.org/rfc/rfc2045#section-5.1
5+
* @see https://www.rfc-editor.org/rfc/rfc9110#name-media-type
6+
* @see https://www.iana.org/assignments/media-types/media-types.xhtml
7+
*/
8+
class WP_Mime_Type {
9+
const TOKEN_CODE_POINTS = "!#$%&'*+-.0123456789^_`ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz|~";
10+
11+
/*
12+
* > An HTTP quoted-string token code point is U+0009 TAB, a code point in the range U+0020 SPACE to U+007E (~),
13+
* > inclusive, or a code point in the range U+0080 through U+00FF (ÿ), inclusive.
14+
*
15+
* This list includes the inverse set of the above.
16+
*
17+
* @since {WP_VERSION}
18+
*/
19+
const QUOTED_STRING_FORBIDDEN = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x7F";
20+
21+
/**
22+
* @since {WP_VERSION}
23+
*
24+
* @var string
25+
*/
26+
protected $type;
27+
28+
/**
29+
* @since {WP_VERSION}
30+
*
31+
* @var string
32+
*/
33+
protected $subtype;
34+
35+
/**
36+
* @since {WP_VERSION}
37+
*
38+
* @var Array<string, string>
39+
*/
40+
protected $parameters;
41+
42+
public static function from_string( string $input ): ?self {
43+
// 1. Remove any leading and trailing HTTP whitespace from input.
44+
$input = trim( $input, " \t\r\n" );
45+
46+
// 2. Let position be a position variable for input, initially pointing at the start of input.
47+
$position = 0;
48+
$end = strlen( $input );
49+
50+
// 3. Let type be the result of collecting a sequence of code points that are not U+002F (/) from input, given position.
51+
$type_start = $position;
52+
$type_length = strcspn( $input, '/', $type_start );
53+
$type = substr( $input, $type_start, $type_length );
54+
55+
// 4. If type is the empty string or does not solely contain HTTP token code points, then return failure.
56+
// 5. If position is past the end of the input, then return failure.
57+
if (
58+
'' === $type ||
59+
( $position + $type_length >= $end ) ||
60+
strspn( $type, self::TOKEN_CODE_POINTS ) !== $type_length
61+
) {
62+
return null;
63+
}
64+
65+
// 6. Advance position by 1. (This skips past U+002F (/).)
66+
$position = $type_start + $type_length + 1;
67+
68+
// 7. Let subtype be the result of collecting a sequence of code points that are not U+003B (;) from input, given position.
69+
$subtype_start = $position;
70+
$subtype_length = strcspn( $input, ';', $subtype_start );
71+
72+
// 8. Remove any trailing HTTP whitespace from subtype.
73+
$subtype = substr( $input, $subtype_start, $subtype_length );
74+
$subtype = rtrim( $subtype, " \t\r\n" );
75+
76+
// 9. If subtype is the empty string or does not solely contain HTTP token code points, then return failure.
77+
if ( '' === $subtype || strspn( $subtype, self::TOKEN_CODE_POINTS ) !== $subtype_length ) {
78+
return null;
79+
}
80+
81+
// 10. Let mimeType be a new MIME type record whose type is type, in ASCII lowercase, and subtype is subtype, in ASCII lowercase.
82+
$self = new self();
83+
$self->type = strtolower( $type );
84+
$self->subtype = strtolower( $subtype );
85+
86+
// 11. While position is not past the end of input:
87+
$position = $subtype_start + $subtype_length;
88+
while ( $position < $end ) {
89+
// 1. Advance position by 1. (This skips past U+003B (;).)
90+
++$position;
91+
92+
// 2. Collect a sequence of code points that are HTTP whitespace from input given position.
93+
$position += strspn( $input, " \t\r\n", $position );
94+
95+
// 3. Let parameterName be the result of collecting a sequence of code points that are not U+003B (;) or U+003D (=) from input, given position.
96+
$parameter_start = $position;
97+
$parameter_length = strcspn( $input, ';=', $parameter_start );
98+
99+
// 4. Set parameterName to parameterName, in ASCII lowercase.
100+
$parameter_name = strtolower( substr( $input, $parameter_start, $parameter_length ) );
101+
102+
// 5. If position is not past the end of input, then:
103+
$position = $parameter_start + $parameter_length;
104+
if ( $position < $end ) {
105+
// 1. If the code point at position within input is U+003B (;), then continue.
106+
if ( ';' === $input[ $position ] ) {
107+
continue;
108+
}
109+
110+
// 2. Advance position by 1. (This skips past U+003D (=).)
111+
++$position;
112+
}
113+
114+
// 6. If position is past the end of input, then break.
115+
if ( $position >= $end ) {
116+
break;
117+
}
118+
119+
// 7. Let parameterValue be null.
120+
$value = null;
121+
122+
// 8. If the code point at position within input is U+0022 ("), then:
123+
if ( '"' === $input[ $position ] ) {
124+
// 1. Set parameterValue to the result of collecting an HTTP quoted string from input, given position and true.
125+
$value_start = $position + 1;
126+
$value_length = strcspn( $input, '"', $value_start );
127+
$value = substr( $input, $value_start, $value_length );
128+
$value = strtr( $value, '\\', '' );
129+
130+
if ( $value_length > 0 && '\\' === $input[ $value_start + $value_length - 1 ] ) {
131+
$value .= '\\';
132+
}
133+
134+
$position = $value_start + $value_length;
135+
$position .= strcspn( $input, ';', $position );
136+
} else { // 9. Otherwise:
137+
// 1. Set parameterValue to the result of collecting a sequence of code points that are not U+003B (;) from input, given position.
138+
$value_start = $position;
139+
$value_length = strcspn( $input, ';', $value_start );
140+
$position = $value_start + $value_length;
141+
142+
// 2. Remove any trailing HTTP whitespace from parameterValue.
143+
$value = rtrim( substr( $input, $value_start, $value_length ), " \t\r\n" );
144+
145+
// 3. If parameterValue is the empty string, then continue.
146+
if ( '' === $value ) {
147+
continue;
148+
}
149+
}
150+
151+
// 10. If all of the following are true…then set mimeType’s parameters[parameterName] to parameterValue.
152+
if (
153+
'' !== $parameter_name &&
154+
strspn( $parameter_name, self::TOKEN_CODE_POINTS ) === $parameter_length &&
155+
strcspn( $value, self::QUOTED_STRING_FORBIDDEN ) === $value_length &&
156+
! isset( $self->parameters[ $parameter_name ] )
157+
) {
158+
$self->parameters[ $parameter_name ] = $value;
159+
}
160+
}
161+
162+
return $self;
163+
}
164+
165+
public function essence(): string {
166+
return "{$this->type}/{$this->subtype}";
167+
}
168+
169+
public function is_font(): bool {
170+
return (
171+
'application' === $this->type &&
172+
in_array(
173+
$this->subtype,
174+
array(
175+
'font-cff',
176+
'font-otf',
177+
'font-sfnt',
178+
'font-ttf',
179+
'font-woff',
180+
'vnd.ms-fontobject',
181+
'vnd.ms-opentype',
182+
),
183+
true
184+
)
185+
);
186+
}
187+
188+
public function is_image(): bool {
189+
return 'image' === $this->type;
190+
}
191+
192+
public function is_javascript(): bool {
193+
if ( 'application' === $this->type ) {
194+
return in_array(
195+
$this->subtype,
196+
array(
197+
'ecmascript',
198+
'javascript',
199+
'x-ecmascript',
200+
'x-javascript',
201+
),
202+
true
203+
);
204+
}
205+
206+
if ( 'text' === $this->type ) {
207+
return in_array(
208+
$this->subtype,
209+
array(
210+
'ecmascript',
211+
'javascript',
212+
'javascript1.0',
213+
'javascript1.1',
214+
'javascript1.2',
215+
'javascript1.3',
216+
'javascript1.4',
217+
'javascript1.5',
218+
'jscript',
219+
'livescript',
220+
'x-ecmascript',
221+
'x-javascript',
222+
),
223+
true
224+
);
225+
}
226+
227+
return false;
228+
}
229+
230+
public function is_json(): bool {
231+
return (
232+
( 'application' === $this->type && 'json' === $this->subtype ) ||
233+
( 'text' === $this->type && 'json' === $this->subtype ) ||
234+
str_ends_with( $this->subtype, '+json' )
235+
);
236+
}
237+
238+
public function is_media(): bool {
239+
return (
240+
'audio' === $this->type ||
241+
'video' === $this->type ||
242+
( 'application' === $this->type && 'ogg' === $this->subtype )
243+
);
244+
}
245+
}

0 commit comments

Comments
 (0)