11<?php
2+
23namespace Masterminds ;
34
4- use Masterminds \HTML5 \Parser \FileInputStream ;
5- use Masterminds \HTML5 \Parser \InputStream ;
6- use Masterminds \HTML5 \Parser \StringInputStream ;
75use Masterminds \HTML5 \Parser \DOMTreeBuilder ;
86use Masterminds \HTML5 \Parser \Scanner ;
97use Masterminds \HTML5 \Parser \Tokenizer ;
1210
1311/**
1412 * This class offers convenience methods for parsing and serializing HTML5.
15- * It is roughly designed to mirror the \DOMDocument class that is
16- * provided with most versions of PHP.
13+ * It is roughly designed to mirror the \DOMDocument native class.
1714 */
1815class HTML5
1916{
20-
2117 /**
2218 * Global options for the parser and serializer.
2319 *
2420 * @var array
2521 */
26- protected $ options = array (
27- // If the serializer should encode all entities.
28- 'encode_entities ' => false
22+ private $ defaultOptions = array (
23+ // Whether the serializer should aggressively encode all characters as entities.
24+ 'encode_entities ' => false ,
25+
26+ // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
27+ 'disable_html_ns ' => false ,
2928 );
3029
3130 protected $ errors = array ();
3231
33- public function __construct (array $ options = array ())
32+ public function __construct (array $ defaultOptions = array ())
3433 {
35- $ this ->options = array_merge ($ this ->options , $ options );
34+ $ this ->defaultOptions = array_merge ($ this ->defaultOptions , $ defaultOptions );
3635 }
3736
3837 /**
39- * Get the default options.
38+ * Get the current default options.
4039 *
41- * @return array The default options.
40+ * @return array
4241 */
4342 public function getOptions ()
4443 {
45- return $ this ->options ;
44+ return $ this ->defaultOptions ;
4645 }
4746
4847 /**
@@ -55,14 +54,13 @@ public function getOptions()
5554 *
5655 * The rules governing parsing are set out in the HTML 5 spec.
5756 *
58- * @param string|resource $file
59- * The path to the file to parse. If this is a resource, it is
60- * assumed to be an open stream whose pointer is set to the first
61- * byte of input.
62- * @param array $options
63- * Configuration options when parsing the HTML
57+ * @param string|resource $file The path to the file to parse. If this is a resource, it is
58+ * assumed to be an open stream whose pointer is set to the first
59+ * byte of input.
60+ * @param array $options Configuration options when parsing the HTML
61+ *
6462 * @return \DOMDocument A DOM document. These object type is defined by the libxml
65- * library, and should have been included with your version of PHP.
63+ * library, and should have been included with your version of PHP.
6664 */
6765 public function load ($ file , array $ options = array ())
6866 {
@@ -80,12 +78,11 @@ public function load($file, array $options = array())
8078 * Take a string of HTML 5 (or earlier) and parse it into a
8179 * DOMDocument.
8280 *
83- * @param string $string
84- * A html5 document as a string.
85- * @param array $options
86- * Configuration options when parsing the HTML
81+ * @param string $string A html5 document as a string
82+ * @param array $options Configuration options when parsing the HTML
83+ *
8784 * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
88- * almost all distribtions of PHP.
85+ * almost all distribtions of PHP.
8986 */
9087 public function loadHTML ($ string , array $ options = array ())
9188 {
@@ -98,15 +95,13 @@ public function loadHTML($string, array $options = array())
9895 * This is here to provide backwards compatibility with the
9996 * PHP DOM implementation. It simply calls load().
10097 *
101- * @param string $file
102- * The path to the file to parse. If this is a resource, it is
103- * assumed to be an open stream whose pointer is set to the first
104- * byte of input.
105- * @param array $options
106- * Configuration options when parsing the HTML
98+ * @param string $file The path to the file to parse. If this is a resource, it is
99+ * assumed to be an open stream whose pointer is set to the first
100+ * byte of input.
101+ * @param array $options Configuration options when parsing the HTML
107102 *
108103 * @return \DOMDocument A DOM document. These object type is defined by the libxml
109- * library, and should have been included with your version of PHP.
104+ * library, and should have been included with your version of PHP.
110105 */
111106 public function loadHTMLFile ($ file , array $ options = array ())
112107 {
@@ -116,19 +111,19 @@ public function loadHTMLFile($file, array $options = array())
116111 /**
117112 * Parse a HTML fragment from a string.
118113 *
119- * @param string $string The HTML5 fragment as a string.
120- * @param array $options Configuration options when parsing the HTML
114+ * @param string $string the HTML5 fragment as a string
115+ * @param array $options Configuration options when parsing the HTML
121116 *
122117 * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
123- * almost all distributions of PHP.
118+ * almost all distributions of PHP.
124119 */
125120 public function loadHTMLFragment ($ string , array $ options = array ())
126121 {
127122 return $ this ->parseFragment ($ string , $ options );
128123 }
129124
130125 /**
131- * Return all errors encountered into parsing phase
126+ * Return all errors encountered into parsing phase.
132127 *
133128 * @return array
134129 */
@@ -138,7 +133,7 @@ public function getErrors()
138133 }
139134
140135 /**
141- * Return true it some errors were encountered into parsing phase
136+ * Return true it some errors were encountered into parsing phase.
142137 *
143138 * @return bool
144139 */
@@ -148,23 +143,20 @@ public function hasErrors()
148143 }
149144
150145 /**
151- * Parse an input stream.
152- *
153- * Lower-level loading function. This requires an input stream instead
154- * of a string, file, or resource.
146+ * Parse an input string.
155147 *
156148 * @param string $input
157- * @param array $options
149+ * @param array $options
158150 *
159151 * @return \DOMDocument
160152 */
161153 public function parse ($ input , array $ options = array ())
162154 {
163155 $ this ->errors = array ();
164- $ options = array_merge ($ this ->getOptions () , $ options );
156+ $ options = array_merge ($ this ->defaultOptions , $ options );
165157 $ events = new DOMTreeBuilder (false , $ options );
166158 $ scanner = new Scanner ($ input );
167- $ parser = new Tokenizer ($ scanner , $ events , !empty ($ options ['xmlNamespaces ' ]) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML );
159+ $ parser = new Tokenizer ($ scanner , $ events , !empty ($ options ['xmlNamespaces ' ]) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML );
168160
169161 $ parser ->parse ();
170162 $ this ->errors = $ events ->getErrors ();
@@ -178,17 +170,17 @@ public function parse($input, array $options = array())
178170 * Lower-level loading function. This requires an input stream instead
179171 * of a string, file, or resource.
180172 *
181- * @param string $input The input data to parse in the form of a string.
182- * @param array $options An array of options
173+ * @param string $input the input data to parse in the form of a string
174+ * @param array $options An array of options
183175 *
184176 * @return \DOMDocumentFragment
185177 */
186178 public function parseFragment ($ input , array $ options = array ())
187179 {
188- $ options = array_merge ($ this ->getOptions () , $ options );
180+ $ options = array_merge ($ this ->defaultOptions , $ options );
189181 $ events = new DOMTreeBuilder (true , $ options );
190182 $ scanner = new Scanner ($ input );
191- $ parser = new Tokenizer ($ scanner , $ events , !empty ($ options ['xmlNamespaces ' ]) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML );
183+ $ parser = new Tokenizer ($ scanner , $ events , !empty ($ options ['xmlNamespaces ' ]) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML );
192184
193185 $ parser ->parse ();
194186 $ this ->errors = $ events ->getErrors ();
@@ -199,15 +191,12 @@ public function parseFragment($input, array $options = array())
199191 /**
200192 * Save a DOM into a given file as HTML5.
201193 *
202- * @param mixed $dom
203- * The DOM to be serialized.
204- * @param string|resource $file
205- * The filename to be written or resource to write to.
206- * @param array $options
207- * Configuration options when serializing the DOM. These include:
208- * - encode_entities: Text written to the output is escaped by default and not all
209- * entities are encoded. If this is set to true all entities will be encoded.
210- * Defaults to false.
194+ * @param mixed $dom The DOM to be serialized
195+ * @param string|resource $file The filename to be written or resource to write to
196+ * @param array $options Configuration options when serializing the DOM. These include:
197+ * - encode_entities: Text written to the output is escaped by default and not all
198+ * entities are encoded. If this is set to true all entities will be encoded.
199+ * Defaults to false.
211200 */
212201 public function save ($ dom , $ file , $ options = array ())
213202 {
@@ -216,9 +205,9 @@ public function save($dom, $file, $options = array())
216205 $ stream = $ file ;
217206 $ close = false ;
218207 } else {
219- $ stream = fopen ($ file , 'w ' );
208+ $ stream = fopen ($ file , 'wb ' );
220209 }
221- $ options = array_merge ($ this ->getOptions () , $ options );
210+ $ options = array_merge ($ this ->defaultOptions , $ options );
222211 $ rules = new OutputRules ($ stream , $ options );
223212 $ trav = new Traverser ($ dom , $ stream , $ rules , $ options );
224213
@@ -232,21 +221,19 @@ public function save($dom, $file, $options = array())
232221 /**
233222 * Convert a DOM into an HTML5 string.
234223 *
235- * @param mixed $dom
236- * The DOM to be serialized.
237- * @param array $options
238- * Configuration options when serializing the DOM. These include:
239- * - encode_entities: Text written to the output is escaped by default and not all
240- * entities are encoded. If this is set to true all entities will be encoded.
241- * Defaults to false.
224+ * @param mixed $dom The DOM to be serialized
225+ * @param array $options Configuration options when serializing the DOM. These include:
226+ * - encode_entities: Text written to the output is escaped by default and not all
227+ * entities are encoded. If this is set to true all entities will be encoded.
228+ * Defaults to false.
242229 *
243- * @return string A HTML5 documented generated from the DOM.
230+ * @return string a HTML5 documented generated from the DOM
244231 */
245232 public function saveHTML ($ dom , $ options = array ())
246233 {
247- $ stream = fopen ('php://temp ' , 'w ' );
248- $ this ->save ($ dom , $ stream , array_merge ($ this ->getOptions () , $ options ));
234+ $ stream = fopen ('php://temp ' , 'wb ' );
235+ $ this ->save ($ dom , $ stream , array_merge ($ this ->defaultOptions , $ options ));
249236
250- return stream_get_contents ($ stream , - 1 , 0 );
237+ return stream_get_contents ($ stream , -1 , 0 );
251238 }
252239}
0 commit comments