Skip to content

Commit 0f8f071

Browse files
author
Mark Baker
authored
WIP: Xxe (#780)
Changes to the xml security scanner to use libxml_disable_entity_loader() when cleanly supported and thread-safe, and to handle UTF-7 charset which otherwise permits an XXE exploit
1 parent 3bea6f5 commit 0f8f071

File tree

11 files changed

+229
-147
lines changed

11 files changed

+229
-147
lines changed

composer.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/PhpSpreadsheet/Reader/BaseReader.php

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -221,37 +221,4 @@ protected function openFile($pFilename)
221221
throw new Exception('Could not open file ' . $pFilename . ' for reading.');
222222
}
223223
}
224-
225-
/**
226-
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
227-
*
228-
* @param string $xml
229-
*
230-
* @throws Exception
231-
*
232-
* @return string
233-
*/
234-
public function securityScan($xml)
235-
{
236-
$pattern = '/\\0?' . implode('\\0?', str_split('<!DOCTYPE')) . '\\0?/';
237-
if (preg_match($pattern, $xml)) {
238-
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
239-
}
240-
241-
return $xml;
242-
}
243-
244-
/**
245-
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
246-
*
247-
* @param string $filestream
248-
*
249-
* @throws Exception
250-
*
251-
* @return string
252-
*/
253-
public function securityScanFile($filestream)
254-
{
255-
return $this->securityScan(file_get_contents($filestream));
256-
}
257224
}

src/PhpSpreadsheet/Reader/Gnumeric.php

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
66
use PhpOffice\PhpSpreadsheet\Cell\DataType;
77
use PhpOffice\PhpSpreadsheet\NamedRange;
8+
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
89
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
910
use PhpOffice\PhpSpreadsheet\RichText\RichText;
1011
use PhpOffice\PhpSpreadsheet\Settings;
@@ -30,13 +31,19 @@ class Gnumeric extends BaseReader
3031

3132
private $referenceHelper;
3233

34+
/**
35+
* @var XmlScanner
36+
*/
37+
private $securityScanner;
38+
3339
/**
3440
* Create a new Gnumeric.
3541
*/
3642
public function __construct()
3743
{
3844
$this->readFilter = new DefaultReadFilter();
3945
$this->referenceHelper = ReferenceHelper::getInstance();
46+
$this->securityScanner = new XmlScanner();
4047
}
4148

4249
/**
@@ -77,7 +84,7 @@ public function listWorksheetNames($pFilename)
7784
File::assertFile($pFilename);
7885

7986
$xml = new XMLReader();
80-
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
87+
$xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
8188
$xml->setParserProperty(2, true);
8289

8390
$worksheetNames = [];
@@ -106,7 +113,7 @@ public function listWorksheetInfo($pFilename)
106113
File::assertFile($pFilename);
107114

108115
$xml = new XMLReader();
109-
$xml->xml($this->securityScanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
116+
$xml->xml($this->securityScanner->scanFile('compress.zlib://' . realpath($pFilename)), null, Settings::getLibXmlLoaderOptions());
110117
$xml->setParserProperty(2, true);
111118

112119
$worksheetInfo = [];
@@ -196,7 +203,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
196203

197204
$gFileData = $this->gzfileGetContents($pFilename);
198205

199-
$xml = simplexml_load_string($this->securityScan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
206+
$xml = simplexml_load_string($this->securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
200207
$namespacesMeta = $xml->getNamespaces(true);
201208

202209
$gnmXML = $xml->children($namespacesMeta['gnm']);

src/PhpSpreadsheet/Reader/Html.php

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use DOMNode;
88
use DOMText;
99
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
10+
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1011
use PhpOffice\PhpSpreadsheet\Spreadsheet;
1112
use PhpOffice\PhpSpreadsheet\Style\Border;
1213
use PhpOffice\PhpSpreadsheet\Style\Color;
@@ -16,6 +17,11 @@
1617
/** PhpSpreadsheet root directory */
1718
class Html extends BaseReader
1819
{
20+
/**
21+
* @var XmlScanner
22+
*/
23+
private $securityScanner;
24+
1925
/**
2026
* Sample size to read to determine if it's HTML or not.
2127
*/
@@ -105,6 +111,7 @@ class Html extends BaseReader
105111
public function __construct()
106112
{
107113
$this->readFilter = new DefaultReadFilter();
114+
$this->securityScanner = new XmlScanner('<!ENTITY');
108115
}
109116

110117
/**
@@ -543,7 +550,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
543550
// Create a new DOM object
544551
$dom = new DOMDocument();
545552
// Reload the HTML file into the DOM object
546-
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
553+
$loaded = $dom->loadHTML(mb_convert_encoding($this->securityScanner->scanFile($pFilename), 'HTML-ENTITIES', 'UTF-8'));
547554
if ($loaded === false) {
548555
throw new Exception('Failed to load ' . $pFilename . ' as a DOM Document');
549556
}
@@ -585,23 +592,6 @@ public function setSheetIndex($pValue)
585592
return $this;
586593
}
587594

588-
/**
589-
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
590-
*
591-
* @param string $xml
592-
*
593-
* @return string
594-
*/
595-
public function securityScan($xml)
596-
{
597-
$pattern = '/\\0?' . implode('\\0?', str_split('<!ENTITY')) . '\\0?/';
598-
if (preg_match($pattern, $xml)) {
599-
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
600-
}
601-
602-
return $xml;
603-
}
604-
605595
/**
606596
* Apply inline css inline style.
607597
*

src/PhpSpreadsheet/Reader/Ods.php

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
99
use PhpOffice\PhpSpreadsheet\Cell\DataType;
1010
use PhpOffice\PhpSpreadsheet\Document\Properties;
11+
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1112
use PhpOffice\PhpSpreadsheet\RichText\RichText;
1213
use PhpOffice\PhpSpreadsheet\Settings;
1314
use PhpOffice\PhpSpreadsheet\Shared\Date;
@@ -19,12 +20,18 @@
1920

2021
class Ods extends BaseReader
2122
{
23+
/**
24+
* @var XmlScanner
25+
*/
26+
private $securityScanner;
27+
2228
/**
2329
* Create a new Ods Reader instance.
2430
*/
2531
public function __construct()
2632
{
2733
$this->readFilter = new DefaultReadFilter();
34+
$this->securityScanner = new XmlScanner();
2835
}
2936

3037
/**
@@ -52,7 +59,7 @@ public function canRead($pFilename)
5259
$mimeType = $zip->getFromName($stat['name']);
5360
} elseif ($stat = $zip->statName('META-INF/manifest.xml')) {
5461
$xml = simplexml_load_string(
55-
$this->securityScan($zip->getFromName('META-INF/manifest.xml')),
62+
$this->securityScanner->scan($zip->getFromName('META-INF/manifest.xml')),
5663
'SimpleXMLElement',
5764
Settings::getLibXmlLoaderOptions()
5865
);
@@ -100,7 +107,7 @@ public function listWorksheetNames($pFilename)
100107

101108
$xml = new XMLReader();
102109
$xml->xml(
103-
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
110+
$this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
104111
null,
105112
Settings::getLibXmlLoaderOptions()
106113
);
@@ -154,7 +161,7 @@ public function listWorksheetInfo($pFilename)
154161

155162
$xml = new XMLReader();
156163
$xml->xml(
157-
$this->securityScanFile('zip://' . realpath($pFilename) . '#content.xml'),
164+
$this->securityScanner->scanFile('zip://' . realpath($pFilename) . '#content.xml'),
158165
null,
159166
Settings::getLibXmlLoaderOptions()
160167
);
@@ -267,7 +274,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
267274
// Meta
268275

269276
$xml = simplexml_load_string(
270-
$this->securityScan($zip->getFromName('meta.xml')),
277+
$this->securityScanner->scan($zip->getFromName('meta.xml')),
271278
'SimpleXMLElement',
272279
Settings::getLibXmlLoaderOptions()
273280
);
@@ -367,7 +374,7 @@ public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
367374

368375
$dom = new \DOMDocument('1.01', 'UTF-8');
369376
$dom->loadXML(
370-
$this->securityScan($zip->getFromName('content.xml')),
377+
$this->securityScanner->scan($zip->getFromName('content.xml')),
371378
Settings::getLibXmlLoaderOptions()
372379
);
373380

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
<?php
2+
3+
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
4+
5+
use PhpOffice\PhpSpreadsheet\Reader\Exception;
6+
7+
class XmlScanner
8+
{
9+
/**
10+
* Identifies whether the thread-safe libxmlDisableEntityLoader() function is available.
11+
*
12+
* @var bool
13+
*/
14+
private $libxmlDisableEntityLoader = false;
15+
16+
private $pattern;
17+
18+
public function __construct($pattern = '<!DOCTYPE')
19+
{
20+
$this->pattern = $pattern;
21+
$this->libxmlDisableEntityLoader = $this->identifyLibxmlDisableEntityLoaderAvailability();
22+
23+
if ($this->libxmlDisableEntityLoader) {
24+
libxml_disable_entity_loader(true);
25+
}
26+
}
27+
28+
private function identifyLibxmlDisableEntityLoaderAvailability()
29+
{
30+
if (PHP_MAJOR_VERSION == 7) {
31+
switch (PHP_MINOR_VERSION) {
32+
case 2:
33+
return PHP_RELEASE_VERSION >= 1;
34+
case 1:
35+
return PHP_RELEASE_VERSION >= 13;
36+
case 0:
37+
return PHP_RELEASE_VERSION >= 27;
38+
}
39+
40+
return true;
41+
}
42+
43+
return false;
44+
}
45+
46+
/**
47+
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
48+
*
49+
* @param mixed $xml
50+
*
51+
* @throws Exception
52+
*
53+
* @return string
54+
*/
55+
public function scan($xml)
56+
{
57+
$pattern = '/encoding="(.*?)"/';
58+
$result = preg_match($pattern, $xml, $matches);
59+
$charset = $result ? $matches[1] : 'UTF-8';
60+
61+
if ($charset !== 'UTF-8') {
62+
$xml = mb_convert_encoding($xml, 'UTF-8', $charset);
63+
}
64+
65+
// Don't rely purely on libxml_disable_entity_loader()
66+
$pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
67+
if (preg_match($pattern, $xml)) {
68+
throw new Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
69+
}
70+
71+
return $xml;
72+
}
73+
74+
/**
75+
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
76+
*
77+
* @param string $filestream
78+
*
79+
* @throws Exception
80+
*
81+
* @return string
82+
*/
83+
public function scanFile($filestream)
84+
{
85+
return $this->scan(file_get_contents($filestream));
86+
}
87+
}

0 commit comments

Comments
 (0)