Skip to content

Commit e22b3a3

Browse files
TimWollabukka
authored andcommitted
Fix GHSA-p3x9-6h7p-cgfc: libxml streams wrong content-type on redirect
libxml streams use wrong content-type header when requesting a redirected resource.
1 parent 9635517 commit e22b3a3

File tree

4 files changed

+195
-5
lines changed

4 files changed

+195
-5
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Basic)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Intentionally using non-standard casing for content-type to verify it is matched not case sensitively.
33+
yield "data://text/plain,HTTP/1.1 200 OK\r\nconteNt-tyPe: text/html; charset=utf-8\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Missing content-type)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Missing content-type in actual response.
33+
yield "data://text/plain,HTTP/1.1 200 OK\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
--TEST--
2+
GHSA-p3x9-6h7p-cgfc: libxml streams use wrong `content-type` header when requesting a redirected resource (Reason with colon)
3+
--EXTENSIONS--
4+
dom
5+
--SKIPIF--
6+
<?php
7+
if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
8+
http_server_skipif();
9+
?>
10+
--FILE--
11+
<?php
12+
require "./ext/standard/tests/http/server.inc";
13+
14+
function genResponses($server) {
15+
$uri = 'http://' . stream_socket_get_name($server, false);
16+
yield "data://text/plain,HTTP/1.1 302 Moved Temporarily\r\nLocation: $uri/document.xml\r\nContent-Type: text/html;charset=utf-16\r\n\r\n";
17+
$xml = <<<'EOT'
18+
<!doctype html>
19+
<html>
20+
<head>
21+
<title>GHSA-p3x9-6h7p-cgfc</title>
22+
23+
<meta charset="utf-8" />
24+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
25+
</head>
26+
27+
<body>
28+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
29+
</body>
30+
</html>
31+
EOT;
32+
// Missing content-type in actual response.
33+
yield "data://text/plain,HTTP/1.1 200 OK: This is fine\r\n\r\n{$xml}";
34+
}
35+
36+
['pid' => $pid, 'uri' => $uri] = http_server('genResponses', $output);
37+
$document = new \DOMDocument();
38+
$document->loadHTMLFile($uri);
39+
40+
$h1 = $document->getElementsByTagName('h1');
41+
var_dump($h1->length);
42+
var_dump($document->saveHTML());
43+
http_server_kill($pid);
44+
?>
45+
--EXPECT--
46+
int(1)
47+
string(266) "<!DOCTYPE html>
48+
<html>
49+
<head>
50+
<title>GHSA-p3x9-6h7p-cgfc</title>
51+
52+
<meta charset="utf-8">
53+
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
54+
</head>
55+
56+
<body>
57+
<h1>GHSA-p3x9-6h7p-cgfc</h1>
58+
</body>
59+
</html>
60+
"

ext/libxml/mime_sniff.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -308,11 +308,21 @@ PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_stream(const php_strea
308308
if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
309309
zval *header;
310310

311-
ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
312-
const char buf[] = "Content-Type:";
313-
if (Z_TYPE_P(header) == IS_STRING &&
314-
!zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
315-
return php_libxml_sniff_charset_from_string(Z_STRVAL_P(header) + sizeof(buf) - 1, Z_STRVAL_P(header) + Z_STRLEN_P(header));
311+
/* Scan backwards: The header array might contain the headers for multiple responses, if
312+
* a redirect was followed.
313+
*/
314+
ZEND_HASH_REVERSE_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
315+
if (Z_TYPE_P(header) == IS_STRING) {
316+
/* If no colon is found in the header, we assume it's the HTTP status line and bail out. */
317+
char *colon = memchr(Z_STRVAL_P(header), ':', Z_STRLEN_P(header));
318+
char *space = memchr(Z_STRVAL_P(header), ' ', Z_STRLEN_P(header));
319+
if (colon == NULL || space < colon) {
320+
return NULL;
321+
}
322+
323+
if (zend_string_starts_with_literal_ci(Z_STR_P(header), "content-type:")) {
324+
return php_libxml_sniff_charset_from_string(Z_STRVAL_P(header) + strlen("content-type:"), Z_STRVAL_P(header) + Z_STRLEN_P(header));
325+
}
316326
}
317327
} ZEND_HASH_FOREACH_END();
318328
}

0 commit comments

Comments
 (0)