11#[ macro_use]
22extern crate rustler;
3- #[ macro_use]
4- extern crate lazy_static;
53extern crate html5ever;
64extern crate xml5ever;
75#[ macro_use]
86extern crate markup5ever;
9- extern crate scoped_pool;
107extern crate tendril;
118
129use std:: panic;
@@ -24,7 +21,7 @@ use rustler::{
2421 Term ,
2522} ;
2623
27- use rustler:: env :: OwnedEnv ;
24+ use rustler:: schedule :: SchedulerFlags ;
2825use rustler:: types:: binary:: Binary ;
2926
3027use tendril:: TendrilSink ;
@@ -129,83 +126,53 @@ enum ParserType {
129126 XmlDocument ,
130127}
131128
132- // Thread pool for `parse_async`.
133- // TODO: How do we decide on pool size?
134- lazy_static ! {
135- static ref POOL : scoped_pool:: Pool = scoped_pool:: Pool :: new( 4 ) ;
136- }
137-
138129fn parse < ' a > ( parser_type : ParserType , env : Env < ' a > , args : & [ Term < ' a > ] ) -> NifResult < Term < ' a > > {
139- let mut owned_env = OwnedEnv :: new ( ) ;
140-
141- // Copies the term into the inner env. Since this term is normally a large
142- // binary term, copying it over should be cheap, since the binary will be
143- // refcounted within the BEAM.
144- let input_term = owned_env. save ( args[ 0 ] ) ;
145-
146- let return_pid = env. pid ( ) ;
147-
148- //let config = term_to_configs(args[1]);
149-
150- POOL . spawn ( move || {
151- owned_env. send_and_clear ( & return_pid, |inner_env| {
152- // This should not really be done in user code. We (Rustler project)
153- // need to find a better abstraction that eliminates this.
154- match panic:: catch_unwind ( || {
155- let binary: Binary = match input_term. load ( inner_env) . decode ( ) {
156- Ok ( inner) => inner,
157- Err ( _) => panic ! ( "argument is not a binary" ) ,
158- } ;
159-
160- let sink = FlatDom :: default ( ) ;
161-
162- let result = match parser_type {
163- ParserType :: HtmlDocument => {
164- // TODO: Use Parser.from_bytes instead?
165- let parser = html5ever:: parse_document ( sink, Default :: default ( ) ) ;
166-
167- match std:: str:: from_utf8 ( binary. as_slice ( ) ) {
168- Ok ( decoded) => parser. one ( decoded) ,
169- Err ( _) => panic ! ( "input is not valid utf8" ) ,
170- }
171- }
172-
173- ParserType :: XmlDocument => {
174- // TODO: Use Parser.from_bytes instead?
175- let parser = xml5ever:: driver:: parse_document ( sink, Default :: default ( ) ) ;
176-
177- match std:: str:: from_utf8 ( binary. as_slice ( ) ) {
178- Ok ( decoded) => parser. one ( decoded) ,
179- Err ( _) => panic ! ( "input is not valid utf8" ) ,
180- }
181-
182- }
183- } ;
184-
185- let result_term = result. encode ( inner_env) ;
186-
187- //let result_term = handle_to_term(inner_env, &index, &Parent::None, &result.document);
188-
189- ( atoms:: html5ever_nif_result ( ) , atoms:: ok ( ) , result_term) . encode ( inner_env)
190- } ) {
191- Ok ( term) => term,
192- Err ( err) => {
193- // Try to extract a panic reason and return that. If this
194- // fails, fail generically.
195- let reason = if let Some ( s) = err. downcast_ref :: < String > ( ) {
196- s. encode ( inner_env)
197- } else if let Some ( & s) = err. downcast_ref :: < & ' static str > ( ) {
198- s. encode ( inner_env)
199- } else {
200- atoms:: nif_panic ( ) . encode ( inner_env)
201- } ;
202- ( atoms:: html5ever_nif_result ( ) , atoms:: error ( ) , reason) . encode ( inner_env)
130+ match panic:: catch_unwind ( || {
131+ let binary: Binary = match args[ 0 ] . decode ( ) {
132+ Ok ( inner) => inner,
133+ Err ( _) => panic ! ( "argument is not a binary" ) ,
134+ } ;
135+
136+ let sink = FlatDom :: default ( ) ;
137+
138+ let result = match parser_type {
139+ ParserType :: HtmlDocument => {
140+ let parser = html5ever:: parse_document ( sink, Default :: default ( ) ) ;
141+
142+ match std:: str:: from_utf8 ( binary. as_slice ( ) ) {
143+ Ok ( decoded) => parser. one ( decoded) ,
144+ Err ( _) => panic ! ( "input is not valid utf8" ) ,
203145 }
204146 }
205- } ) ;
206- } ) ;
207147
208- Ok ( atoms:: ok ( ) . encode ( env) )
148+ ParserType :: XmlDocument => {
149+ let parser = xml5ever:: driver:: parse_document ( sink, Default :: default ( ) ) ;
150+
151+ match std:: str:: from_utf8 ( binary. as_slice ( ) ) {
152+ Ok ( decoded) => parser. one ( decoded) ,
153+ Err ( _) => panic ! ( "input is not valid utf8" ) ,
154+ }
155+ }
156+ } ;
157+
158+ let result_term = result. encode ( env) ;
159+
160+ ( atoms:: html5ever_nif_result ( ) , atoms:: ok ( ) , result_term) . encode ( env)
161+ } ) {
162+ Ok ( term) => Ok ( term) ,
163+ Err ( err) => {
164+ // Try to extract a panic reason and return that. If this
165+ // fails, fail generically.
166+ let reason = if let Some ( s) = err. downcast_ref :: < String > ( ) {
167+ s. encode ( env)
168+ } else if let Some ( & s) = err. downcast_ref :: < & ' static str > ( ) {
169+ s. encode ( env)
170+ } else {
171+ atoms:: nif_panic ( ) . encode ( env)
172+ } ;
173+ Ok ( ( atoms:: html5ever_nif_result ( ) , atoms:: error ( ) , reason) . encode ( env) )
174+ }
175+ }
209176}
210177
211178fn parse_html < ' a > ( env : Env < ' a > , args : & [ Term < ' a > ] ) -> NifResult < Term < ' a > > {
@@ -218,7 +185,10 @@ fn parse_xml<'a>(env: Env<'a>, args: &[Term<'a>]) -> NifResult<Term<'a>> {
218185
219186rustler_export_nifs ! (
220187 "Elixir.MeeseeksHtml5ever.Native" ,
221- [ ( "parse_html" , 1 , parse_html) , ( "parse_xml" , 1 , parse_xml) , ] ,
188+ [
189+ ( "parse_html" , 1 , parse_html, SchedulerFlags :: DirtyCpu ) ,
190+ ( "parse_xml" , 1 , parse_xml, SchedulerFlags :: DirtyCpu ) ,
191+ ] ,
222192 Some ( on_load)
223193) ;
224194
0 commit comments