@@ -3,11 +3,15 @@ use std::convert::Infallible;
33
44use crate :: emitters:: callback:: { Callback , CallbackEmitter , CallbackEvent } ;
55use crate :: utils:: trace_log;
6- use crate :: { Emitter , Error , State } ;
6+ use crate :: { Emitter , Error , Readable , Reader , State , Tokenizer } ;
77
8+ use html5ever:: interface:: { create_element, TreeSink } ;
9+ use html5ever:: tokenizer:: states:: State as Html5everState ;
810use html5ever:: tokenizer:: {
911 states:: RawKind , Doctype , Tag , TagKind , Token as Html5everToken , TokenSink , TokenSinkResult ,
1012} ;
13+ use html5ever:: tree_builder:: TreeBuilder ;
14+ use html5ever:: ParseOpts ;
1115use html5ever:: { Attribute , QualName } ;
1216
1317const BOGUS_LINENO : u64 = 1 ;
@@ -278,3 +282,111 @@ impl<'a, S: TokenSink> Emitter for Html5everEmitter<'a, S> {
278282 . adjusted_current_node_present_but_not_in_html_namespace ( )
279283 }
280284}
285+
286+ fn map_tokenizer_state ( input : Html5everState ) -> State {
287+ match input {
288+ Html5everState :: Data => State :: Data ,
289+ Html5everState :: Plaintext => State :: PlainText ,
290+ Html5everState :: RawData ( RawKind :: Rcdata ) => State :: RcData ,
291+ Html5everState :: RawData ( RawKind :: Rawtext ) => State :: RawText ,
292+ Html5everState :: RawData ( RawKind :: ScriptData ) => State :: ScriptData ,
293+ x => todo ! ( "{:?}" , x) ,
294+ }
295+ }
296+
297+ /// Parse an HTML fragment
298+ ///
299+ /// This is a convenience function for using [Html5everEmitter] together with html5ever. It is
300+ /// equivalent to the same functions in [html5ever::driver].
301+ ///
302+ /// ```
303+ /// use html5ever::{local_name, QualName, ns, namespace_url}; // extern crate html5ever;
304+ /// use scraper::Html; // extern crate scraper;
305+ ///
306+ /// let input = "<h1>hello world</h1>";
307+ ///
308+ /// // equivalent to `Html::parse_fragment`
309+ /// let dom = Html::new_fragment();
310+ /// let Ok(dom) = html5gum::emitters::html5ever::parse_fragment(
311+ /// input,
312+ /// dom,
313+ /// Default::default(),
314+ /// QualName::new(None, ns!(html), local_name!("body")),
315+ /// Vec::new()
316+ /// );
317+ /// ```
318+ pub fn parse_fragment < ' a , R , Sink > (
319+ input : R ,
320+ mut sink : Sink ,
321+ opts : ParseOpts ,
322+ context_name : QualName ,
323+ context_attrs : Vec < Attribute > ,
324+ ) -> Result < Sink , <R :: Reader as Reader >:: Error >
325+ where
326+ R : Readable < ' a > ,
327+ Sink : TreeSink ,
328+ {
329+ let context_elem = create_element ( & mut sink, context_name, context_attrs) ;
330+ parse_fragment_for_element ( input, sink, opts, context_elem, None )
331+ }
332+
333+ /// Like `parse_fragment`, but with an existing context element
334+ /// and optionally a form element.
335+ ///
336+ /// This is a convenience function for using [Html5everEmitter] together with html5ever. It is
337+ /// equivalent to the same functions in [html5ever::driver].
338+ pub fn parse_fragment_for_element < ' a , R , Sink > (
339+ input : R ,
340+ sink : Sink ,
341+ opts : ParseOpts ,
342+ context_element : Sink :: Handle ,
343+ form_element : Option < Sink :: Handle > ,
344+ ) -> Result < Sink , <R :: Reader as Reader >:: Error >
345+ where
346+ R : Readable < ' a > ,
347+ Sink : TreeSink ,
348+ {
349+ let mut tree_builder =
350+ TreeBuilder :: new_for_fragment ( sink, context_element, form_element, opts. tree_builder ) ;
351+
352+ let initial_state = map_tokenizer_state ( tree_builder. tokenizer_state_for_context_elem ( ) ) ;
353+ let token_emitter = Html5everEmitter :: new ( & mut tree_builder) ;
354+ let mut tokenizer = Tokenizer :: new_with_emitter ( input, token_emitter) ;
355+ tokenizer. set_state ( initial_state) ;
356+ tokenizer. finish ( ) ?;
357+ Ok ( tree_builder. sink )
358+ }
359+
360+ /// Parse an HTML document.
361+ ///
362+ /// This is a convenience function for using [Html5everEmitter] together with html5ever. It is
363+ /// equivalent to the same functions in [html5ever::driver].
364+ ///
365+ /// ```rust
366+ /// use scraper::Html; // extern crate scraper;
367+ ///
368+ /// let input = "<h1>hello world</h1>";
369+ ///
370+ /// // equivalent to `Html::parse_document`
371+ /// let dom = Html::new_document();
372+ /// let Ok(dom) = html5gum::emitters::html5ever::parse_document(
373+ /// input,
374+ /// dom,
375+ /// Default::default()
376+ /// );
377+ /// ```
378+ pub fn parse_document < ' a , R , Sink > (
379+ input : R ,
380+ sink : Sink ,
381+ opts : ParseOpts ,
382+ ) -> Result < Sink , <R :: Reader as Reader >:: Error >
383+ where
384+ R : Readable < ' a > ,
385+ Sink : TreeSink ,
386+ {
387+ let mut tree_builder = TreeBuilder :: new ( sink, opts. tree_builder ) ;
388+ let token_emitter = Html5everEmitter :: new ( & mut tree_builder) ;
389+ let tokenizer = Tokenizer :: new_with_emitter ( input, token_emitter) ;
390+ tokenizer. finish ( ) ?;
391+ Ok ( tree_builder. sink )
392+ }
0 commit comments