55namespace SimpleSAML \XML ;
66
77use DOMDocument ;
8- use RuntimeException ;
98use SimpleSAML \Assert \Assert ;
109use SimpleSAML \XML \Exception \IOException ;
10+ use SimpleSAML \XML \Exception \RuntimeException ;
1111use SimpleSAML \XML \Exception \UnparseableXMLException ;
1212
13- use function defined ;
1413use function file_get_contents ;
14+ use function func_num_args ;
1515use function libxml_clear_errors ;
1616use function libxml_get_last_error ;
17+ use function libxml_set_external_entity_loader ;
1718use function libxml_use_internal_errors ;
1819use function sprintf ;
1920
2223 */
2324final class DOMDocumentFactory
2425{
26+ /**
27+ * @var non-negative-int
28+ * TODO: Add LIBXML_NO_XXE to the defaults when PHP 8.4.0 + libxml 2.13.0 become generally available
29+ */
30+ public const DEFAULT_OPTIONS = LIBXML_COMPACT | LIBXML_NONET | LIBXML_NSCLEAN ;
31+
32+
2533 /**
2634 * @param string $xml
27- * @param non-empty-string $xml
35+ * @param non-negative-int $options
2836 *
2937 * @return \DOMDocument
3038 */
31- public static function fromString (string $ xml ): DOMDocument
32- {
39+ public static function fromString (
40+ string $ xml ,
41+ int $ options = self ::DEFAULT_OPTIONS ,
42+ ): DOMDocument {
43+ libxml_set_external_entity_loader (null );
3344 Assert::notWhitespaceOnly ($ xml );
45+ Assert::notRegex (
46+ $ xml ,
47+ '/<(\s*)!(\s*)DOCTYPE/ ' ,
48+ 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body ' ,
49+ RuntimeException::class,
50+ );
3451
3552 $ internalErrors = libxml_use_internal_errors (true );
3653 libxml_clear_errors ();
3754
38- $ domDocument = self ::create ();
39- $ options = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET | LIBXML_PARSEHUGE | LIBXML_NSCLEAN ;
40- if (defined ('LIBXML_COMPACT ' )) {
41- $ options |= LIBXML_COMPACT ;
55+ // If LIBXML_NO_XXE is available and option not set
56+ if (func_num_args () === 1 && defined ('LIBXML_NO_XXE ' )) {
57+ $ options != LIBXML_NO_XXE ;
4258 }
4359
60+ $ domDocument = self ::create ();
4461 $ loaded = $ domDocument ->loadXML ($ xml , $ options );
4562
4663 libxml_use_internal_errors ($ internalErrors );
@@ -55,11 +72,11 @@ public static function fromString(string $xml): DOMDocument
5572 libxml_clear_errors ();
5673
5774 foreach ($ domDocument ->childNodes as $ child ) {
58- if ( $ child -> nodeType === XML_DOCUMENT_TYPE_NODE ) {
59- throw new RuntimeException (
60- 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body ' ,
61- );
62- }
75+ Assert:: false (
76+ $ child -> nodeType === XML_DOCUMENT_TYPE_NODE ,
77+ 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body ' ,
78+ RuntimeException::class,
79+ );
6380 }
6481
6582 return $ domDocument ;
@@ -68,10 +85,11 @@ public static function fromString(string $xml): DOMDocument
6885
6986 /**
7087 * @param string $file
88+ * @param non-negative-int $options
7189 *
7290 * @return \DOMDocument
7391 */
74- public static function fromFile (string $ file ): DOMDocument
92+ public static function fromFile (string $ file, int $ options = self :: DEFAULT_OPTIONS ): DOMDocument
7593 {
7694 error_clear_last ();
7795 $ xml = @file_get_contents ($ file );
@@ -83,7 +101,7 @@ public static function fromFile(string $file): DOMDocument
83101 }
84102
85103 Assert::notWhitespaceOnly ($ xml , sprintf ('File "%s" does not have content ' , $ file ), RuntimeException::class);
86- return static ::fromString ($ xml );
104+ return ( func_num_args () === 1 ) ? static ::fromString ($ xml) : static :: fromString ( $ xml , $ options );
87105 }
88106
89107
0 commit comments