Skip to content

Commit 91a4b42

Browse files
committed
Ignore Settings::libXmlLoaderOptions
Backport of PR #4233.
1 parent 570bb6e commit 91a4b42

File tree

12 files changed

+69
-50
lines changed

12 files changed

+69
-50
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com)
66
and this project adheres to [Semantic Versioning](https://semver.org).
77

8+
## 2024-11-22 - 2.3.3
9+
10+
### Changed
11+
12+
- Settings::libXmlLoaderOptions is ignored. Backport of [PR #4233](https://github.com/PHPOffice/PhpSpreadsheet/pull/4233)
13+
14+
### Deprecated
15+
16+
- Settings::setLibXmlLoaderOptions() and Settings::getLibXmlLoaderOptions() are no longer needed - no replacement.
17+
818
## 2024-11-10 - 2.3.2
919

1020
### Fixed

docs/topics/reading-and-writing-to-file.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,6 @@ versions of Microsoft Excel.
298298
**Excel 2003 XML limitations** Please note that Excel 2003 XML format
299299
has some limits regarding to styling cells and handling large
300300
spreadsheets via PHP.
301-
Also, only files using charset UTF-8 or ISO-8859-* are supported.
302301

303302
### \PhpOffice\PhpSpreadsheet\Reader\Xml
304303

src/PhpSpreadsheet/Reader/Gnumeric.php

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1212
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
1313
use PhpOffice\PhpSpreadsheet\RichText\RichText;
14-
use PhpOffice\PhpSpreadsheet\Settings;
1514
use PhpOffice\PhpSpreadsheet\Shared\File;
1615
use PhpOffice\PhpSpreadsheet\Spreadsheet;
1716
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
@@ -104,7 +103,7 @@ public function listWorksheetNames(string $filename): array
104103

105104
$xml = new XMLReader();
106105
$contents = $this->gzfileGetContents($filename);
107-
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
106+
$xml->xml($contents);
108107
$xml->setParserProperty(2, true);
109108

110109
$worksheetNames = [];
@@ -133,7 +132,7 @@ public function listWorksheetInfo(string $filename): array
133132

134133
$xml = new XMLReader();
135134
$contents = $this->gzfileGetContents($filename);
136-
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
135+
$xml->xml($contents);
137136
$xml->setParserProperty(2, true);
138137

139138
$worksheetInfo = [];
@@ -247,7 +246,7 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
247246

248247
/** @var XmlScanner */
249248
$securityScanner = $this->securityScanner;
250-
$xml2 = simplexml_load_string($securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
249+
$xml2 = simplexml_load_string($securityScanner->scan($gFileData));
251250
$xml = self::testSimpleXml($xml2);
252251

253252
$gnmXML = $xml->children(self::NAMESPACE_GNM);

src/PhpSpreadsheet/Reader/Html.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class Html extends BaseReader
3434

3535
private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/';
3636

37-
private const DECLARES_CHARSET = '/ charset=/i';
37+
private const DECLARES_CHARSET = '/\\bcharset=/i';
3838

3939
/**
4040
* Input encoding.

src/PhpSpreadsheet/Reader/Ods.php

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties;
1818
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1919
use PhpOffice\PhpSpreadsheet\RichText\RichText;
20-
use PhpOffice\PhpSpreadsheet\Settings;
2120
use PhpOffice\PhpSpreadsheet\Shared\Date;
2221
use PhpOffice\PhpSpreadsheet\Shared\File;
2322
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -58,9 +57,12 @@ public function canRead(string $filename): bool
5857
$mimeType = $zip->getFromName($stat['name']);
5958
} elseif ($zip->statName('META-INF/manifest.xml')) {
6059
$xml = simplexml_load_string(
61-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('META-INF/manifest.xml')),
62-
'SimpleXMLElement',
63-
Settings::getLibXmlLoaderOptions()
60+
$this->getSecurityScannerOrThrow()
61+
->scan(
62+
$zip->getFromName(
63+
'META-INF/manifest.xml'
64+
)
65+
)
6466
);
6567
if ($xml !== false) {
6668
$namespacesContent = $xml->getNamespaces(true);
@@ -98,9 +100,8 @@ public function listWorksheetNames(string $filename): array
98100

99101
$xml = new XMLReader();
100102
$xml->xml(
101-
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
102-
null,
103-
Settings::getLibXmlLoaderOptions()
103+
$this->getSecurityScannerOrThrow()
104+
->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE)
104105
);
105106
$xml->setParserProperty(2, true);
106107

@@ -145,9 +146,8 @@ public function listWorksheetInfo(string $filename): array
145146

146147
$xml = new XMLReader();
147148
$xml->xml(
148-
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
149-
null,
150-
Settings::getLibXmlLoaderOptions()
149+
$this->getSecurityScannerOrThrow()
150+
->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE)
151151
);
152152
$xml->setParserProperty(2, true);
153153

@@ -253,9 +253,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
253253
// Meta
254254

255255
$xml = @simplexml_load_string(
256-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('meta.xml')),
257-
'SimpleXMLElement',
258-
Settings::getLibXmlLoaderOptions()
256+
$this->getSecurityScannerOrThrow()
257+
->scan($zip->getFromName('meta.xml'))
259258
);
260259
if ($xml === false) {
261260
throw new Exception('Unable to read data from {$pFilename}');
@@ -269,8 +268,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
269268

270269
$dom = new DOMDocument('1.01', 'UTF-8');
271270
$dom->loadXML(
272-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('styles.xml')),
273-
Settings::getLibXmlLoaderOptions()
271+
$this->getSecurityScannerOrThrow()
272+
->scan($zip->getFromName('styles.xml'))
274273
);
275274

276275
$pageSettings = new PageSettings($dom);
@@ -279,8 +278,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
279278

280279
$dom = new DOMDocument('1.01', 'UTF-8');
281280
$dom->loadXML(
282-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName(self::INITIAL_FILE)),
283-
Settings::getLibXmlLoaderOptions()
281+
$this->getSecurityScannerOrThrow()
282+
->scan($zip->getFromName(self::INITIAL_FILE))
284283
);
285284

286285
$officeNs = (string) $dom->lookupNamespaceUri('office');
@@ -670,8 +669,8 @@ private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): voi
670669
{
671670
$dom = new DOMDocument('1.01', 'UTF-8');
672671
$dom->loadXML(
673-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('settings.xml')),
674-
Settings::getLibXmlLoaderOptions()
672+
$this->getSecurityScannerOrThrow()
673+
->scan($zip->getFromName('settings.xml'))
675674
);
676675
//$xlinkNs = $dom->lookupNamespaceUri('xlink');
677676
$configNs = (string) $dom->lookupNamespaceUri('config');

src/PhpSpreadsheet/Reader/Xlsx.php

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\WorkbookView;
2727
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
2828
use PhpOffice\PhpSpreadsheet\RichText\RichText;
29-
use PhpOffice\PhpSpreadsheet\Settings;
3029
use PhpOffice\PhpSpreadsheet\Shared\Date;
3130
use PhpOffice\PhpSpreadsheet\Shared\Drawing;
3231
use PhpOffice\PhpSpreadsheet\Shared\File;
@@ -121,7 +120,7 @@ private function loadZip(string $filename, string $ns = '', bool $replaceUnclose
121120
$rels = @simplexml_load_string(
122121
$this->getSecurityScannerOrThrow()->scan($contents),
123122
'SimpleXMLElement',
124-
Settings::getLibXmlLoaderOptions(),
123+
0,
125124
$ns
126125
);
127126

@@ -136,7 +135,7 @@ private function loadZipNonamespace(string $filename, string $ns): SimpleXMLElem
136135
$rels = simplexml_load_string(
137136
$this->getSecurityScannerOrThrow()->scan($contents),
138137
'SimpleXMLElement',
139-
Settings::getLibXmlLoaderOptions(),
138+
0,
140139
($ns === '' ? $ns : '')
141140
);
142141

@@ -244,11 +243,13 @@ public function listWorksheetInfo(string $filename): array
244243

245244
$xml = new XMLReader();
246245
$xml->xml(
247-
$this->getSecurityScannerOrThrow()->scan(
248-
$this->getFromZipArchive($this->zip, $fileWorksheetPath)
249-
),
250-
null,
251-
Settings::getLibXmlLoaderOptions()
246+
$this->getSecurityScannerOrThrow()
247+
->scan(
248+
$this->getFromZipArchive(
249+
$this->zip,
250+
$fileWorksheetPath
251+
)
252+
)
252253
);
253254
$xml->setParserProperty(2, true);
254255

@@ -1966,9 +1967,8 @@ private function readRibbon(Spreadsheet $excel, string $customUITarget, ZipArchi
19661967
if ($dataRels) {
19671968
// exists and not empty if the ribbon have some pictures (other than internal MSO)
19681969
$UIRels = simplexml_load_string(
1969-
$this->getSecurityScannerOrThrow()->scan($dataRels),
1970-
'SimpleXMLElement',
1971-
Settings::getLibXmlLoaderOptions()
1970+
$this->getSecurityScannerOrThrow()
1971+
->scan($dataRels)
19721972
);
19731973
if (false !== $UIRels) {
19741974
// we need to save id and target to avoid parsing customUI.xml and "guess" if it's a pseudo callback who load the image

src/PhpSpreadsheet/Reader/Xlsx/Properties.php

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
use PhpOffice\PhpSpreadsheet\Document\Properties as DocumentProperties;
66
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
7-
use PhpOffice\PhpSpreadsheet\Settings;
87
use SimpleXMLElement;
98

109
class Properties
@@ -23,9 +22,7 @@ private function extractPropertyData(string $propertyData): ?SimpleXMLElement
2322
{
2423
// okay to omit namespace because everything will be processed by xpath
2524
$obj = simplexml_load_string(
26-
$this->securityScanner->scan($propertyData),
27-
'SimpleXMLElement',
28-
Settings::getLibXmlLoaderOptions()
25+
$this->securityScanner->scan($propertyData)
2926
);
3027

3128
return $obj === false ? null : $obj;

src/PhpSpreadsheet/Reader/Xml.php

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties;
1616
use PhpOffice\PhpSpreadsheet\Reader\Xml\Style;
1717
use PhpOffice\PhpSpreadsheet\RichText\RichText;
18-
use PhpOffice\PhpSpreadsheet\Settings;
1918
use PhpOffice\PhpSpreadsheet\Shared\Date;
2019
use PhpOffice\PhpSpreadsheet\Shared\File;
2120
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -133,9 +132,7 @@ private function trySimpleXMLLoadStringPrivate(string $filename, string $fileOrS
133132
}
134133
if ($continue) {
135134
$xml = @simplexml_load_string(
136-
$this->getSecurityScannerOrThrow()->scan($data),
137-
'SimpleXMLElement',
138-
Settings::getLibXmlLoaderOptions()
135+
$this->getSecurityScannerOrThrow()->scan($data)
139136
);
140137
}
141138
} catch (Throwable $e) {

src/PhpSpreadsheet/Settings.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ public static function htmlEntityFlags(): int
9494
* Set default options for libxml loader.
9595
*
9696
* @param ?int $options Default options for libxml loader
97+
*
98+
* @deprecated 3.5.0 no longer needed
9799
*/
98100
public static function setLibXmlLoaderOptions(?int $options): int
99101
{
@@ -110,14 +112,12 @@ public static function setLibXmlLoaderOptions(?int $options): int
110112
* Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly.
111113
*
112114
* @return int Default options for libxml loader
115+
*
116+
* @deprecated 3.5.0 no longer needed
113117
*/
114118
public static function getLibXmlLoaderOptions(): int
115119
{
116-
if (self::$libXmlLoaderOptions === null) {
117-
return self::setLibXmlLoaderOptions(null);
118-
}
119-
120-
return self::$libXmlLoaderOptions;
120+
return self::$libXmlLoaderOptions ?? (defined('LIBXML_DTDLOAD') ? (LIBXML_DTDLOAD | LIBXML_DTDATTR) : 0);
121121
}
122122

123123
/**

tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public static function providerCharset(): array
4040
['charset.UTF-16.lebom.html', 'À1'],
4141
['charset.gb18030.html', '电视机'],
4242
['charset.unknown.html', 'exception'],
43+
['xhtml4.entity.xhtml', 'exception'],
4344
];
4445
}
4546
}

0 commit comments

Comments
 (0)