1616// You should have received a copy of the GNU Affero General Public License
1717// along with this program. If not, see <https://www.gnu.org/licenses/>.
1818
19- mod types;
2019mod sink;
20+ mod types;
2121
2222#[ cfg( debug_assertions) ]
2323#[ global_allocator]
2424static GLOBAL : tikv_jemallocator:: Jemalloc = tikv_jemallocator:: Jemalloc ;
2525
26- use types:: * ;
2726use std:: cell:: Cell ;
2827use std:: os:: raw:: { c_uchar, c_void} ;
28+ use types:: * ;
2929
30- use html5ever:: { parse_document, parse_fragment, QualName , LocalName , ns, ParseOpts , Parser } ;
31- use html5ever:: tendril:: { TendrilSink , StrTendril } ;
3230use html5ever:: interface:: tree_builder:: QuirksMode ;
31+ use html5ever:: tendril:: { StrTendril , TendrilSink } ;
32+ use html5ever:: { ns, parse_document, parse_fragment, LocalName , ParseOpts , Parser , QualName } ;
3333
3434#[ no_mangle]
3535pub extern "C" fn html5ever_parse_document (
@@ -135,13 +135,14 @@ pub extern "C" fn html5ever_parse_fragment(
135135
136136 let bytes = unsafe { std:: slice:: from_raw_parts ( html, len) } ;
137137 parse_fragment (
138- sink, Default :: default ( ) ,
138+ sink,
139+ Default :: default ( ) ,
139140 QualName :: new ( None , ns ! ( html) , LocalName :: from ( "body" ) ) ,
140- vec ! [ ] , // attributes
141- false , // context_element_allows_scripting
141+ vec ! [ ] , // attributes
142+ false , // context_element_allows_scripting
142143 )
143- . from_utf8 ( )
144- . one ( bytes) ;
144+ . from_utf8 ( )
145+ . one ( bytes) ;
145146}
146147
147148#[ no_mangle]
@@ -182,15 +183,15 @@ pub struct Memory {
182183#[ cfg( debug_assertions) ]
183184#[ no_mangle]
184185pub extern "C" fn html5ever_get_memory_usage ( ) -> Memory {
185- use tikv_jemalloc_ctl:: { stats , epoch } ;
186+ use tikv_jemalloc_ctl:: { epoch , stats } ;
186187
187188 // many statistics are cached and only updated when the epoch is advanced.
188189 epoch:: advance ( ) . unwrap ( ) ;
189190
190- return Memory {
191+ return Memory {
191192 resident : stats:: resident:: read ( ) . unwrap ( ) ,
192193 allocated : stats:: allocated:: read ( ) . unwrap ( ) ,
193- }
194+ } ;
194195}
195196
196197// Streaming parser API
@@ -225,9 +226,8 @@ pub extern "C" fn html5ever_streaming_parser_create(
225226 // SAFETY: We're creating a self-referential structure here.
226227 // The arena is stored in the StreamingParser and lives as long as the parser.
227228 // The sink contains a reference to the arena that's valid for the parser's lifetime.
228- let arena_ref: & ' static typed_arena:: Arena < sink:: ElementData > = unsafe {
229- std:: mem:: transmute ( arena. as_ref ( ) )
230- } ;
229+ let arena_ref: & ' static typed_arena:: Arena < sink:: ElementData > =
230+ unsafe { std:: mem:: transmute ( arena. as_ref ( ) ) } ;
231231
232232 let sink = sink:: Sink {
233233 ctx : ctx,
@@ -281,7 +281,8 @@ pub extern "C" fn html5ever_streaming_parser_feed(
281281
282282 // Feed the chunk to the parser
283283 // The Parser implements TendrilSink, so we can call process() on it
284- let parser = streaming_parser. parser
284+ let parser = streaming_parser
285+ . parser
285286 . downcast_mut :: < Parser < sink:: Sink > > ( )
286287 . expect ( "Invalid parser type" ) ;
287288
@@ -304,7 +305,8 @@ pub extern "C" fn html5ever_streaming_parser_finish(parser_ptr: *mut c_void) {
304305 let streaming_parser = unsafe { Box :: from_raw ( parser_ptr as * mut StreamingParser ) } ;
305306
306307 // Extract and finish the parser
307- let parser = streaming_parser. parser
308+ let parser = streaming_parser
309+ . parser
308310 . downcast :: < Parser < sink:: Sink > > ( )
309311 . expect ( "Invalid parser type" ) ;
310312
@@ -326,3 +328,57 @@ pub extern "C" fn html5ever_streaming_parser_destroy(parser_ptr: *mut c_void) {
326328 let _ = Box :: from_raw ( parser_ptr as * mut StreamingParser ) ;
327329 }
328330}
331+
332+ #[ no_mangle]
333+ pub extern "C" fn xml5ever_parse_document (
334+ xml : * mut c_uchar ,
335+ len : usize ,
336+ document : Ref ,
337+ ctx : Ref ,
338+ create_element_callback : CreateElementCallback ,
339+ get_data_callback : GetDataCallback ,
340+ append_callback : AppendCallback ,
341+ parse_error_callback : ParseErrorCallback ,
342+ pop_callback : PopCallback ,
343+ create_comment_callback : CreateCommentCallback ,
344+ create_processing_instruction : CreateProcessingInstruction ,
345+ append_doctype_to_document : AppendDoctypeToDocumentCallback ,
346+ add_attrs_if_missing_callback : AddAttrsIfMissingCallback ,
347+ get_template_contents_callback : GetTemplateContentsCallback ,
348+ remove_from_parent_callback : RemoveFromParentCallback ,
349+ reparent_children_callback : ReparentChildrenCallback ,
350+ append_before_sibling_callback : AppendBeforeSiblingCallback ,
351+ append_based_on_parent_node_callback : AppendBasedOnParentNodeCallback ,
352+ ) -> ( ) {
353+ if xml. is_null ( ) || len == 0 {
354+ return ( ) ;
355+ }
356+
357+ let arena = typed_arena:: Arena :: new ( ) ;
358+
359+ let sink = sink:: Sink {
360+ ctx : ctx,
361+ arena : & arena,
362+ document : document,
363+ quirks_mode : Cell :: new ( QuirksMode :: NoQuirks ) ,
364+ pop_callback : pop_callback,
365+ append_callback : append_callback,
366+ get_data_callback : get_data_callback,
367+ parse_error_callback : parse_error_callback,
368+ create_element_callback : create_element_callback,
369+ create_comment_callback : create_comment_callback,
370+ create_processing_instruction : create_processing_instruction,
371+ append_doctype_to_document : append_doctype_to_document,
372+ add_attrs_if_missing_callback : add_attrs_if_missing_callback,
373+ get_template_contents_callback : get_template_contents_callback,
374+ remove_from_parent_callback : remove_from_parent_callback,
375+ reparent_children_callback : reparent_children_callback,
376+ append_before_sibling_callback : append_before_sibling_callback,
377+ append_based_on_parent_node_callback : append_based_on_parent_node_callback,
378+ } ;
379+
380+ let bytes = unsafe { std:: slice:: from_raw_parts ( xml, len) } ;
381+ xml5ever:: driver:: parse_document ( sink, xml5ever:: driver:: XmlParseOpts :: default ( ) )
382+ . from_utf8 ( )
383+ . one ( bytes) ;
384+ }
0 commit comments