Skip to content

Commit 52de298

Browse files
committed
Ignore Settings::libXmlLoaderOptions
Backport of PR #4233.
1 parent 819854a commit 52de298

File tree

12 files changed

+70
-50
lines changed

12 files changed

+70
-50
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com)
66
and this project adheres to [Semantic Versioning](https://semver.org).
77

8+
## 2024-11-22 - 2.1.4
9+
10+
### Changed
11+
12+
- Settings::libXmlLoaderOptions is ignored. Backport of [PR #4233](https://github.com/PHPOffice/PhpSpreadsheet/pull/4233)
13+
14+
### Deprecated
15+
16+
- Settings::setLibXmlLoaderOptions() and Settings::getLibXmlLoaderOptions() are no longer needed - no replacement.
17+
818
## 2024-11-10 - 2.1.3
919

1020
### Fixed

docs/topics/reading-and-writing-to-file.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,6 @@ versions of Microsoft Excel.
298298
**Excel 2003 XML limitations** Please note that Excel 2003 XML format
299299
has some limits regarding to styling cells and handling large
300300
spreadsheets via PHP.
301-
Also, only files using charset UTF-8 or ISO-8859-* are supported.
302301

303302
### \PhpOffice\PhpSpreadsheet\Reader\Xml
304303

src/PhpSpreadsheet/Reader/Gnumeric.php

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1212
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
1313
use PhpOffice\PhpSpreadsheet\RichText\RichText;
14-
use PhpOffice\PhpSpreadsheet\Settings;
1514
use PhpOffice\PhpSpreadsheet\Shared\File;
1615
use PhpOffice\PhpSpreadsheet\Spreadsheet;
1716
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
@@ -104,7 +103,7 @@ public function listWorksheetNames(string $filename): array
104103

105104
$xml = new XMLReader();
106105
$contents = $this->gzfileGetContents($filename);
107-
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
106+
$xml->xml($contents);
108107
$xml->setParserProperty(2, true);
109108

110109
$worksheetNames = [];
@@ -133,7 +132,7 @@ public function listWorksheetInfo(string $filename): array
133132

134133
$xml = new XMLReader();
135134
$contents = $this->gzfileGetContents($filename);
136-
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
135+
$xml->xml($contents);
137136
$xml->setParserProperty(2, true);
138137

139138
$worksheetInfo = [];
@@ -247,7 +246,7 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
247246

248247
/** @var XmlScanner */
249248
$securityScanner = $this->securityScanner;
250-
$xml2 = simplexml_load_string($securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
249+
$xml2 = simplexml_load_string($securityScanner->scan($gFileData));
251250
$xml = self::testSimpleXml($xml2);
252251

253252
$gnmXML = $xml->children(self::NAMESPACE_GNM);

src/PhpSpreadsheet/Reader/Html.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class Html extends BaseReader
3232

3333
private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/';
3434

35-
private const DECLARES_CHARSET = '/ charset=/i';
35+
private const DECLARES_CHARSET = '/\\bcharset=/i';
3636

3737
/**
3838
* Input encoding.

src/PhpSpreadsheet/Reader/Ods.php

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties;
1717
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1818
use PhpOffice\PhpSpreadsheet\RichText\RichText;
19-
use PhpOffice\PhpSpreadsheet\Settings;
2019
use PhpOffice\PhpSpreadsheet\Shared\Date;
2120
use PhpOffice\PhpSpreadsheet\Shared\File;
2221
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -57,9 +56,12 @@ public function canRead(string $filename): bool
5756
$mimeType = $zip->getFromName($stat['name']);
5857
} elseif ($zip->statName('META-INF/manifest.xml')) {
5958
$xml = simplexml_load_string(
60-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('META-INF/manifest.xml')),
61-
'SimpleXMLElement',
62-
Settings::getLibXmlLoaderOptions()
59+
$this->getSecurityScannerOrThrow()
60+
->scan(
61+
$zip->getFromName(
62+
'META-INF/manifest.xml'
63+
)
64+
)
6365
);
6466
if ($xml !== false) {
6567
$namespacesContent = $xml->getNamespaces(true);
@@ -97,9 +99,8 @@ public function listWorksheetNames(string $filename): array
9799

98100
$xml = new XMLReader();
99101
$xml->xml(
100-
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
101-
null,
102-
Settings::getLibXmlLoaderOptions()
102+
$this->getSecurityScannerOrThrow()
103+
->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE)
103104
);
104105
$xml->setParserProperty(2, true);
105106

@@ -144,9 +145,8 @@ public function listWorksheetInfo(string $filename): array
144145

145146
$xml = new XMLReader();
146147
$xml->xml(
147-
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
148-
null,
149-
Settings::getLibXmlLoaderOptions()
148+
$this->getSecurityScannerOrThrow()
149+
->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE)
150150
);
151151
$xml->setParserProperty(2, true);
152152

@@ -252,9 +252,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
252252
// Meta
253253

254254
$xml = @simplexml_load_string(
255-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('meta.xml')),
256-
'SimpleXMLElement',
257-
Settings::getLibXmlLoaderOptions()
255+
$this->getSecurityScannerOrThrow()
256+
->scan($zip->getFromName('meta.xml'))
258257
);
259258
if ($xml === false) {
260259
throw new Exception('Unable to read data from {$pFilename}');
@@ -268,8 +267,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
268267

269268
$dom = new DOMDocument('1.01', 'UTF-8');
270269
$dom->loadXML(
271-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('styles.xml')),
272-
Settings::getLibXmlLoaderOptions()
270+
$this->getSecurityScannerOrThrow()
271+
->scan($zip->getFromName('styles.xml'))
273272
);
274273

275274
$pageSettings = new PageSettings($dom);
@@ -278,8 +277,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
278277

279278
$dom = new DOMDocument('1.01', 'UTF-8');
280279
$dom->loadXML(
281-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName(self::INITIAL_FILE)),
282-
Settings::getLibXmlLoaderOptions()
280+
$this->getSecurityScannerOrThrow()
281+
->scan($zip->getFromName(self::INITIAL_FILE))
283282
);
284283

285284
$officeNs = (string) $dom->lookupNamespaceUri('office');
@@ -655,8 +654,8 @@ private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): voi
655654
{
656655
$dom = new DOMDocument('1.01', 'UTF-8');
657656
$dom->loadXML(
658-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('settings.xml')),
659-
Settings::getLibXmlLoaderOptions()
657+
$this->getSecurityScannerOrThrow()
658+
->scan($zip->getFromName('settings.xml'))
660659
);
661660
//$xlinkNs = $dom->lookupNamespaceUri('xlink');
662661
$configNs = (string) $dom->lookupNamespaceUri('config');

src/PhpSpreadsheet/Reader/Xlsx.php

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\WorkbookView;
2626
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
2727
use PhpOffice\PhpSpreadsheet\RichText\RichText;
28-
use PhpOffice\PhpSpreadsheet\Settings;
2928
use PhpOffice\PhpSpreadsheet\Shared\Date;
3029
use PhpOffice\PhpSpreadsheet\Shared\Drawing;
3130
use PhpOffice\PhpSpreadsheet\Shared\File;
@@ -120,7 +119,7 @@ private function loadZip(string $filename, string $ns = '', bool $replaceUnclose
120119
$rels = @simplexml_load_string(
121120
$this->getSecurityScannerOrThrow()->scan($contents),
122121
'SimpleXMLElement',
123-
Settings::getLibXmlLoaderOptions(),
122+
0,
124123
$ns
125124
);
126125

@@ -135,7 +134,7 @@ private function loadZipNonamespace(string $filename, string $ns): SimpleXMLElem
135134
$rels = simplexml_load_string(
136135
$this->getSecurityScannerOrThrow()->scan($contents),
137136
'SimpleXMLElement',
138-
Settings::getLibXmlLoaderOptions(),
137+
0,
139138
($ns === '' ? $ns : '')
140139
);
141140

@@ -243,11 +242,13 @@ public function listWorksheetInfo(string $filename): array
243242

244243
$xml = new XMLReader();
245244
$xml->xml(
246-
$this->getSecurityScannerOrThrow()->scan(
247-
$this->getFromZipArchive($this->zip, $fileWorksheetPath)
248-
),
249-
null,
250-
Settings::getLibXmlLoaderOptions()
245+
$this->getSecurityScannerOrThrow()
246+
->scan(
247+
$this->getFromZipArchive(
248+
$this->zip,
249+
$fileWorksheetPath
250+
)
251+
)
251252
);
252253
$xml->setParserProperty(2, true);
253254

@@ -1950,9 +1951,8 @@ private function readRibbon(Spreadsheet $excel, string $customUITarget, ZipArchi
19501951
if ($dataRels) {
19511952
// exists and not empty if the ribbon have some pictures (other than internal MSO)
19521953
$UIRels = simplexml_load_string(
1953-
$this->getSecurityScannerOrThrow()->scan($dataRels),
1954-
'SimpleXMLElement',
1955-
Settings::getLibXmlLoaderOptions()
1954+
$this->getSecurityScannerOrThrow()
1955+
->scan($dataRels)
19561956
);
19571957
if (false !== $UIRels) {
19581958
// we need to save id and target to avoid parsing customUI.xml and "guess" if it's a pseudo callback who load the image

src/PhpSpreadsheet/Reader/Xlsx/Properties.php

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
use PhpOffice\PhpSpreadsheet\Document\Properties as DocumentProperties;
66
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
7-
use PhpOffice\PhpSpreadsheet\Settings;
87
use SimpleXMLElement;
98

109
class Properties
@@ -23,9 +22,7 @@ private function extractPropertyData(string $propertyData): ?SimpleXMLElement
2322
{
2423
// okay to omit namespace because everything will be processed by xpath
2524
$obj = simplexml_load_string(
26-
$this->securityScanner->scan($propertyData),
27-
'SimpleXMLElement',
28-
Settings::getLibXmlLoaderOptions()
25+
$this->securityScanner->scan($propertyData)
2926
);
3027

3128
return $obj === false ? null : $obj;

src/PhpSpreadsheet/Reader/Xml.php

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties;
1616
use PhpOffice\PhpSpreadsheet\Reader\Xml\Style;
1717
use PhpOffice\PhpSpreadsheet\RichText\RichText;
18-
use PhpOffice\PhpSpreadsheet\Settings;
1918
use PhpOffice\PhpSpreadsheet\Shared\Date;
2019
use PhpOffice\PhpSpreadsheet\Shared\File;
2120
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -134,9 +133,8 @@ private function trySimpleXMLLoadStringPrivate(string $filename, string $fileOrS
134133
}
135134
if ($continue) {
136135
$xml = @simplexml_load_string(
137-
$this->getSecurityScannerOrThrow()->scan($data),
138-
'SimpleXMLElement',
139-
Settings::getLibXmlLoaderOptions()
136+
$this->getSecurityScannerOrThrow()
137+
->scan($data)
140138
);
141139
}
142140
} catch (Throwable $e) {

src/PhpSpreadsheet/Settings.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ public static function htmlEntityFlags(): int
9696
* Set default options for libxml loader.
9797
*
9898
* @param ?int $options Default options for libxml loader
99+
*
100+
* @deprecated 3.5.0 no longer needed
99101
*/
100102
public static function setLibXmlLoaderOptions(?int $options): int
101103
{
@@ -112,14 +114,12 @@ public static function setLibXmlLoaderOptions(?int $options): int
112114
* Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly.
113115
*
114116
* @return int Default options for libxml loader
117+
*
118+
* @deprecated 3.5.0 no longer needed
115119
*/
116120
public static function getLibXmlLoaderOptions(): int
117121
{
118-
if (self::$libXmlLoaderOptions === null) {
119-
return self::setLibXmlLoaderOptions(null);
120-
}
121-
122-
return self::$libXmlLoaderOptions;
122+
return self::$libXmlLoaderOptions ?? (defined('LIBXML_DTDLOAD') ? (LIBXML_DTDLOAD | LIBXML_DTDATTR) : 0);
123123
}
124124

125125
/**

tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public static function providerCharset(): array
4040
['charset.UTF-16.lebom.html', 'À1'],
4141
['charset.gb18030.html', '电视机'],
4242
['charset.unknown.html', 'exception'],
43+
['xhtml4.entity.xhtml', 'exception'],
4344
];
4445
}
4546
}

0 commit comments

Comments
 (0)