Skip to content

Commit fb74dcd

Browse files
authored
Merge pull request #4233 from oleibman/nodtdload
Ignore Settings::libXmlLoaderOptions
2 parents c01b94b + 343d26a commit fb74dcd

File tree

12 files changed

+67
-52
lines changed

12 files changed

+67
-52
lines changed

CHANGELOG.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,23 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com)
66
and this project adheres to [Semantic Versioning](https://semver.org).
77

8-
## TBD - 3.5.0
8+
## 2024-11-22 - 3.5.0
99

1010
### Added
1111

1212
- Nothing yet.
1313

1414
### Changed
1515

16-
- Nothing yet.
16+
- Settings::libXmlLoaderOptions is ignored. [PR #4233](https://github.com/PHPOffice/PhpSpreadsheet/pull/4233)
1717

1818
### Moved
1919

2020
- Nothing yet.
2121

2222
### Deprecated
2323

24+
- Settings::setLibXmlLoaderOptions() and Settings::getLibXmlLoaderOptions() are no longer needed - no replacement.
2425
- Worksheet::getHashCode is no longer needed.
2526

2627
### Fixed

docs/topics/reading-and-writing-to-file.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,6 @@ versions of Microsoft Excel.
298298
**Excel 2003 XML limitations** Please note that Excel 2003 XML format
299299
has some limits regarding to styling cells and handling large
300300
spreadsheets via PHP.
301-
Also, only files using charset UTF-8 or ISO-8859-* are supported.
302301

303302
### \PhpOffice\PhpSpreadsheet\Reader\Xml
304303

src/PhpSpreadsheet/Reader/Gnumeric.php

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1212
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
1313
use PhpOffice\PhpSpreadsheet\RichText\RichText;
14-
use PhpOffice\PhpSpreadsheet\Settings;
1514
use PhpOffice\PhpSpreadsheet\Shared\File;
1615
use PhpOffice\PhpSpreadsheet\Spreadsheet;
1716
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
@@ -104,7 +103,7 @@ public function listWorksheetNames(string $filename): array
104103

105104
$xml = new XMLReader();
106105
$contents = $this->gzfileGetContents($filename);
107-
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
106+
$xml->xml($contents);
108107
$xml->setParserProperty(2, true);
109108

110109
$worksheetNames = [];
@@ -133,7 +132,7 @@ public function listWorksheetInfo(string $filename): array
133132

134133
$xml = new XMLReader();
135134
$contents = $this->gzfileGetContents($filename);
136-
$xml->xml($contents, null, Settings::getLibXmlLoaderOptions());
135+
$xml->xml($contents);
137136
$xml->setParserProperty(2, true);
138137

139138
$worksheetInfo = [];
@@ -248,7 +247,7 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
248247

249248
/** @var XmlScanner */
250249
$securityScanner = $this->securityScanner;
251-
$xml2 = simplexml_load_string($securityScanner->scan($gFileData), 'SimpleXMLElement', Settings::getLibXmlLoaderOptions());
250+
$xml2 = simplexml_load_string($securityScanner->scan($gFileData));
252251
$xml = self::testSimpleXml($xml2);
253252

254253
$gnmXML = $xml->children(self::NAMESPACE_GNM);

src/PhpSpreadsheet/Reader/Html.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class Html extends BaseReader
3434

3535
private const STARTS_WITH_BOM = '/^(?:\xfe\xff|\xff\xfe|\xEF\xBB\xBF)/';
3636

37-
private const DECLARES_CHARSET = '/ charset=/i';
37+
private const DECLARES_CHARSET = '/\\bcharset=/i';
3838

3939
/**
4040
* Input encoding.

src/PhpSpreadsheet/Reader/Ods.php

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
use PhpOffice\PhpSpreadsheet\Reader\Ods\Properties as DocumentProperties;
1818
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
1919
use PhpOffice\PhpSpreadsheet\RichText\RichText;
20-
use PhpOffice\PhpSpreadsheet\Settings;
2120
use PhpOffice\PhpSpreadsheet\Shared\Date;
2221
use PhpOffice\PhpSpreadsheet\Shared\File;
2322
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -58,9 +57,12 @@ public function canRead(string $filename): bool
5857
$mimeType = $zip->getFromName($stat['name']);
5958
} elseif ($zip->statName('META-INF/manifest.xml')) {
6059
$xml = simplexml_load_string(
61-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('META-INF/manifest.xml')),
62-
'SimpleXMLElement',
63-
Settings::getLibXmlLoaderOptions()
60+
$this->getSecurityScannerOrThrow()
61+
->scan(
62+
$zip->getFromName(
63+
'META-INF/manifest.xml'
64+
)
65+
)
6466
);
6567
if ($xml !== false) {
6668
$namespacesContent = $xml->getNamespaces(true);
@@ -98,9 +100,10 @@ public function listWorksheetNames(string $filename): array
98100

99101
$xml = new XMLReader();
100102
$xml->xml(
101-
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
102-
null,
103-
Settings::getLibXmlLoaderOptions()
103+
$this->getSecurityScannerOrThrow()
104+
->scanFile(
105+
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE
106+
)
104107
);
105108
$xml->setParserProperty(2, true);
106109

@@ -145,9 +148,10 @@ public function listWorksheetInfo(string $filename): array
145148

146149
$xml = new XMLReader();
147150
$xml->xml(
148-
$this->getSecurityScannerOrThrow()->scanFile('zip://' . realpath($filename) . '#' . self::INITIAL_FILE),
149-
null,
150-
Settings::getLibXmlLoaderOptions()
151+
$this->getSecurityScannerOrThrow()
152+
->scanFile(
153+
'zip://' . realpath($filename) . '#' . self::INITIAL_FILE
154+
)
151155
);
152156
$xml->setParserProperty(2, true);
153157

@@ -254,9 +258,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
254258
// Meta
255259

256260
$xml = @simplexml_load_string(
257-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('meta.xml')),
258-
'SimpleXMLElement',
259-
Settings::getLibXmlLoaderOptions()
261+
$this->getSecurityScannerOrThrow()
262+
->scan($zip->getFromName('meta.xml'))
260263
);
261264
if ($xml === false) {
262265
throw new Exception('Unable to read data from {$pFilename}');
@@ -270,8 +273,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
270273

271274
$dom = new DOMDocument('1.01', 'UTF-8');
272275
$dom->loadXML(
273-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('styles.xml')),
274-
Settings::getLibXmlLoaderOptions()
276+
$this->getSecurityScannerOrThrow()
277+
->scan($zip->getFromName('styles.xml'))
275278
);
276279

277280
$pageSettings = new PageSettings($dom);
@@ -280,8 +283,8 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
280283

281284
$dom = new DOMDocument('1.01', 'UTF-8');
282285
$dom->loadXML(
283-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName(self::INITIAL_FILE)),
284-
Settings::getLibXmlLoaderOptions()
286+
$this->getSecurityScannerOrThrow()
287+
->scan($zip->getFromName(self::INITIAL_FILE))
285288
);
286289

287290
$officeNs = (string) $dom->lookupNamespaceUri('office');
@@ -690,8 +693,8 @@ private function processSettings(ZipArchive $zip, Spreadsheet $spreadsheet): voi
690693
{
691694
$dom = new DOMDocument('1.01', 'UTF-8');
692695
$dom->loadXML(
693-
$this->getSecurityScannerOrThrow()->scan($zip->getFromName('settings.xml')),
694-
Settings::getLibXmlLoaderOptions()
696+
$this->getSecurityScannerOrThrow()
697+
->scan($zip->getFromName('settings.xml'))
695698
);
696699
//$xlinkNs = $dom->lookupNamespaceUri('xlink');
697700
$configNs = (string) $dom->lookupNamespaceUri('config');

src/PhpSpreadsheet/Reader/Xlsx.php

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
use PhpOffice\PhpSpreadsheet\Reader\Xlsx\WorkbookView;
2828
use PhpOffice\PhpSpreadsheet\ReferenceHelper;
2929
use PhpOffice\PhpSpreadsheet\RichText\RichText;
30-
use PhpOffice\PhpSpreadsheet\Settings;
3130
use PhpOffice\PhpSpreadsheet\Shared\Date;
3231
use PhpOffice\PhpSpreadsheet\Shared\Drawing;
3332
use PhpOffice\PhpSpreadsheet\Shared\File;
@@ -123,7 +122,7 @@ private function loadZip(string $filename, string $ns = '', bool $replaceUnclose
123122
$rels = @simplexml_load_string(
124123
$this->getSecurityScannerOrThrow()->scan($contents),
125124
'SimpleXMLElement',
126-
Settings::getLibXmlLoaderOptions(),
125+
0,
127126
$ns
128127
);
129128

@@ -138,7 +137,7 @@ private function loadZipNonamespace(string $filename, string $ns): SimpleXMLElem
138137
$rels = simplexml_load_string(
139138
$this->getSecurityScannerOrThrow()->scan($contents),
140139
'SimpleXMLElement',
141-
Settings::getLibXmlLoaderOptions(),
140+
0,
142141
($ns === '' ? $ns : '')
143142
);
144143

@@ -245,11 +244,13 @@ public function listWorksheetInfo(string $filename): array
245244

246245
$xml = new XMLReader();
247246
$xml->xml(
248-
$this->getSecurityScannerOrThrow()->scan(
249-
$this->getFromZipArchive($this->zip, $fileWorksheetPath)
250-
),
251-
null,
252-
Settings::getLibXmlLoaderOptions()
247+
$this->getSecurityScannerOrThrow()
248+
->scan(
249+
$this->getFromZipArchive(
250+
$this->zip,
251+
$fileWorksheetPath
252+
)
253+
)
253254
);
254255
$xml->setParserProperty(2, true);
255256

@@ -2001,9 +2002,8 @@ private function readRibbon(Spreadsheet $excel, string $customUITarget, ZipArchi
20012002
if ($dataRels) {
20022003
// exists and not empty if the ribbon have some pictures (other than internal MSO)
20032004
$UIRels = simplexml_load_string(
2004-
$this->getSecurityScannerOrThrow()->scan($dataRels),
2005-
'SimpleXMLElement',
2006-
Settings::getLibXmlLoaderOptions()
2005+
$this->getSecurityScannerOrThrow()
2006+
->scan($dataRels)
20072007
);
20082008
if (false !== $UIRels) {
20092009
// we need to save id and target to avoid parsing customUI.xml and "guess" if it's a pseudo callback who load the image

src/PhpSpreadsheet/Reader/Xlsx/Properties.php

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
use PhpOffice\PhpSpreadsheet\Document\Properties as DocumentProperties;
66
use PhpOffice\PhpSpreadsheet\Reader\Security\XmlScanner;
7-
use PhpOffice\PhpSpreadsheet\Settings;
87
use SimpleXMLElement;
98

109
class Properties
@@ -23,9 +22,7 @@ private function extractPropertyData(string $propertyData): ?SimpleXMLElement
2322
{
2423
// okay to omit namespace because everything will be processed by xpath
2524
$obj = simplexml_load_string(
26-
$this->securityScanner->scan($propertyData),
27-
'SimpleXMLElement',
28-
Settings::getLibXmlLoaderOptions()
25+
$this->securityScanner->scan($propertyData)
2926
);
3027

3128
return $obj === false ? null : $obj;

src/PhpSpreadsheet/Reader/Xml.php

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
use PhpOffice\PhpSpreadsheet\Reader\Xml\Properties;
1616
use PhpOffice\PhpSpreadsheet\Reader\Xml\Style;
1717
use PhpOffice\PhpSpreadsheet\RichText\RichText;
18-
use PhpOffice\PhpSpreadsheet\Settings;
1918
use PhpOffice\PhpSpreadsheet\Shared\Date;
2019
use PhpOffice\PhpSpreadsheet\Shared\File;
2120
use PhpOffice\PhpSpreadsheet\Spreadsheet;
@@ -132,9 +131,8 @@ private function trySimpleXMLLoadStringPrivate(string $filename, string $fileOrS
132131
}
133132
if ($continue) {
134133
$xml = @simplexml_load_string(
135-
$this->getSecurityScannerOrThrow()->scan($data),
136-
'SimpleXMLElement',
137-
Settings::getLibXmlLoaderOptions()
134+
$this->getSecurityScannerOrThrow()
135+
->scan($data)
138136
);
139137
}
140138
} catch (Throwable $e) {

src/PhpSpreadsheet/Settings.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ public static function htmlEntityFlags(): int
9494
* Set default options for libxml loader.
9595
*
9696
* @param ?int $options Default options for libxml loader
97+
*
98+
* @deprecated 3.5.0 no longer needed
9799
*/
98100
public static function setLibXmlLoaderOptions(?int $options): int
99101
{
@@ -110,14 +112,12 @@ public static function setLibXmlLoaderOptions(?int $options): int
110112
* Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly.
111113
*
112114
* @return int Default options for libxml loader
115+
*
116+
* @deprecated 3.5.0 no longer needed
113117
*/
114118
public static function getLibXmlLoaderOptions(): int
115119
{
116-
if (self::$libXmlLoaderOptions === null) {
117-
return self::setLibXmlLoaderOptions(null);
118-
}
119-
120-
return self::$libXmlLoaderOptions;
120+
return self::$libXmlLoaderOptions ?? (defined('LIBXML_DTDLOAD') ? (LIBXML_DTDLOAD | LIBXML_DTDATTR) : 0);
121121
}
122122

123123
/**

tests/PhpSpreadsheetTests/Reader/Html/HtmlCharsetTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public static function providerCharset(): array
4040
['charset.UTF-16.lebom.html', 'À1'],
4141
['charset.gb18030.html', '电视机'],
4242
['charset.unknown.html', 'exception'],
43+
['xhtml4.entity.xhtml', 'exception'],
4344
];
4445
}
4546
}

0 commit comments

Comments
 (0)