11use itertools:: Itertools ;
2- use lazy_static:: lazy_static;
32use log:: { error, info} ;
4- use regex :: Regex ;
3+ use reqwest :: Client ;
54use scraper:: selectable:: Selectable ;
65use scraper:: Html ;
76use std:: collections:: HashMap ;
@@ -11,34 +10,35 @@ use strum::IntoEnumIterator;
1110use super :: models:: { DevilFruit , DfTypeInfo } ;
1211use crate :: df:: parser:: { get_parser, Utils } ;
1312use crate :: df:: types:: DfType ;
14- use crate :: fetcher:: HtmlFetcher ;
13+ use crate :: fetcher:: { FetchHtml , HtmlFetcher } ;
1514use crate :: types:: { Error , UrlTyped } ;
1615
1716pub trait DfScrapable {
1817 async fn get_dftype_info ( & self ) -> Result < Vec < DfTypeInfo > , Error > ;
1918 async fn get_df_list ( & self ) -> Result < Vec < DevilFruit > , Error > ;
2019}
2120
22- lazy_static ! {
23- static ref REX_EN_NAME : Regex = Regex :: new( r"English version: (.+)" ) . unwrap( ) ;
24- }
25-
2621#[ derive( Debug ) ]
27- pub struct DfScraper {
28- fetcher : HtmlFetcher ,
22+ pub struct DfScraper < T = Client >
23+ where
24+ T : FetchHtml + Clone ,
25+ {
26+ fetcher : HtmlFetcher < T > ,
2927 base_url : String ,
3028}
3129
32- impl DfScraper {
33- pub fn new ( fetcher : HtmlFetcher , base_url : & str ) -> Self {
30+ impl < T : FetchHtml + Clone > DfScraper < T > {
31+ pub fn new ( fetcher : HtmlFetcher < T > , base_url : & str ) -> Self {
3432 Self {
3533 fetcher,
3634 base_url : base_url. to_string ( ) ,
3735 }
3836 }
3937}
4038
41- impl DfScrapable for DfScraper {
39+ impl < T : FetchHtml + Clone + std:: marker:: Send + std:: marker:: Sync + ' static > DfScrapable
40+ for DfScraper < T >
41+ {
4242 async fn get_dftype_info ( & self ) -> Result < Vec < DfTypeInfo > , Error > {
4343 let url = format ! ( "{}/wiki/Devil_Fruit" , self . base_url) ;
4444 let html = self . fetcher . fetch ( & url) . await ?;
@@ -60,11 +60,7 @@ impl DfScrapable for DfScraper {
6060 . map ( |row| {
6161 let cells = row. select ( & td_selector) . collect_vec ( ) ;
6262 if cells. len ( ) < 3 {
63- let msg = format ! (
64- "Expected at least 3 cells, found {}: {:?}" ,
65- cells. len( ) ,
66- row. html( )
67- ) ;
63+ let msg = format ! ( "Expected at least 3 cells, found {}" , cells. len( ) ) ;
6864 return Err ( Error :: InvalidStructure ( msg) ) ;
6965 }
7066
@@ -86,12 +82,7 @@ impl DfScrapable for DfScraper {
8682 DfType :: Logia => l_desc. trim ( ) ,
8783 _ => "" ,
8884 } ;
89- let obj = DfTypeInfo {
90- df_type,
91- cannon_count : cc,
92- non_cannon_count : ncc,
93- description : desc. to_string ( ) ,
94- } ;
85+ let obj = DfTypeInfo :: new ( df_type, cc, ncc, desc. to_string ( ) ) ;
9586 info ! ( "obj: {}" , & obj) ;
9687 Ok ( obj)
9788 } )
@@ -116,7 +107,6 @@ impl DfScrapable for DfScraper {
116107 devil_fruits_map. insert ( df_url. clone ( ) , df) ;
117108
118109 let fetcher = self . fetcher . clone ( ) ;
119-
120110 pic_tasks. spawn ( async move {
121111 let html = fetcher. fetch_only ( & df_url) . await ?;
122112 let doc = Html :: parse_document ( & html) ;
@@ -144,3 +134,133 @@ impl DfScrapable for DfScraper {
144134 Ok ( devil_fruits_map. into_values ( ) . sorted ( ) . collect_vec ( ) )
145135 }
146136}
137+
138+ #[ cfg( test) ]
139+ mod tests {
140+ use std:: collections:: HashMap ;
141+
142+ use async_trait:: async_trait;
143+
144+ use crate :: {
145+ df:: scraper:: { DfScrapable , DfScraper } ,
146+ fetcher:: { FetchHtml , HtmlFetcher } ,
147+ types:: Error ,
148+ } ;
149+
150+ #[ derive( Clone ) ]
151+ struct MockClient {
152+ res_req : HashMap < String , Result < String , Error > > ,
153+ }
154+
155+ #[ async_trait]
156+ impl FetchHtml for MockClient {
157+ async fn fetch ( & self , url : & str ) -> Result < String , Error > {
158+ self . res_req . get ( url) . cloned ( ) . unwrap ( )
159+ }
160+ }
161+
162+ fn prepare_fetcher < const N : usize > (
163+ arr : [ ( String , Result < String , Error > ) ; N ] ,
164+ ) -> HtmlFetcher < MockClient > {
165+ let client = MockClient {
166+ res_req : HashMap :: from ( arr) ,
167+ } ;
168+ HtmlFetcher :: new ( client)
169+ }
170+
171+ #[ tokio:: test]
172+ async fn get_type_info ( ) {
173+ let fetcher = prepare_fetcher ( [ (
174+ "/wiki/Devil_Fruit" . to_string ( ) ,
175+ Ok ( r#"<html><body>
176+ <h4><span id="Paramecia">Paramecia</span></h4>
177+ <p>Paramecia Text</p>
178+ <h4><span id="Zoan">Zoan</span></h4>
179+ <p>Zoan Text</p>
180+ <h4><span id="Logia">Logia</span></h4>
181+ <p>Logia Text</p>
182+ <table class="wikitable">
183+ <tbody>
184+ <tr><th></th><th>Canon</th><th>Non-Canon</th><th>Total</th></tr>
185+ <tr><td>Paramecia</td><td>94 </td><td>48</td><td>142</td></tr>
186+ <tr><td>Zoan</td><td>55</td><td>7 </td><td> 62</td></tr>
187+ <tr><td>Logia</td><td>13</td><td>3</td><td>16 </td></tr>
188+ <tr><td>Undetermined</td><td>3</td><td>2</td><td>5</td></tr>
189+ <tr><td>Last</td><td></td><td></td><td></td></tr>
190+ </tbody></table>
191+ </body></html>"#
192+ . to_string ( ) ) ,
193+ ) ] ) ;
194+ let scrape = DfScraper :: new ( fetcher, "" ) ;
195+ let result = scrape. get_dftype_info ( ) . await ;
196+ assert ! ( result. is_ok( ) ) ;
197+ assert_eq ! ( result. unwrap( ) . len( ) , 4 ) ;
198+ }
199+
200+ #[ tokio:: test]
201+ async fn get_df_list ( ) {
202+ let fetcher = prepare_fetcher ( [
203+ (
204+ "/wiki/Logia" . to_string ( ) ,
205+ Ok ( r#"<html><body>
206+ <h4><span id="Logia-Types">Logia</span></h4>
207+ <dl></dl>
208+ <h3>Some Text</h3>
209+ <dl></dl>
210+ <ul>
211+ <li><a href="/dfpath_logia" title="Some Logia Df">Some Logia Devil Fruit</a> (<i>English versions: Some En Logia Df</i>): Some description. Eaten by <a href="/character">Df User</a>.</li>
212+ </ul>
213+ </body></html>"#
214+ . to_string ( ) ) ,
215+ ) ,
216+ (
217+ "/wiki/Paramecia" . to_string ( ) ,
218+ Ok ( r#"<html><body>
219+ <h4><span id="Paramecia-Type_Fruits">Paramecia</span></h4>
220+ <dl></dl>
221+ <h3>Some Text</h3>
222+ <dl></dl>
223+ <ul>
224+ <li><a href="/dfpath_paramecia" title="Some Paramecia Df">Some Paramecia Devil Fruit</a> (<i>English versions: Some En Paramecia Df</i>): Some description. Eaten by <a href="/character">Df User</a>.</li>
225+ </ul>
226+ </body></html>"#
227+ . to_string ( ) ) ,
228+ ) ,
229+ (
230+ "/wiki/Zoan" . to_string ( ) ,
231+ Ok ( r#"<html><body>
232+ <h4><span id="Ancient_Zoan">Ancient Zoan</span></h4>
233+ <dl></dl>
234+ <p>wow</p>
235+ <ul>
236+ <li><a href="/dfpath-ancient-zoan" title="Some Ancient Zoan">Some ancient zoan</a>: Eaten by <a href="/characterx" title="X">X</a>.</li>
237+ </ul>
238+ <h4><span id="Mythical_Zoan">Mythical Zoan</span></h4>
239+ <dl></dl>
240+ <p>wow</p>
241+ <ul>
242+ <li><a href="/dfpath-mythical-zoan" title="Some Mythical Zoan">Some mythical zoan</a>: Eaten by <a href="/characterx" title="X">X</a>.</li>
243+ </ul>
244+ <h4><span id="List_of_Zoan-Type_Fruits">Zoan</span></h4>
245+ <h3>Some Text</h3>
246+ <dl></dl>
247+ <ul>
248+ <li><a href="/dfpath-zoan" title="Some Zoan Df">Some Zoan Devil Fruit</a> (<i>English versions: Some En Zoan Df</i>): Some description. Eaten by <a href="/character">Df User</a>.</li>
249+ <li><a href="/dfpath-ancient-zoan" title="Some Ancient Zoan">Some ancient zoan</a> (<i>English versions: Some En Ancient Zoan Df</i>): Some description. Eaten by <a href="/characterx">X</a>.</li>
250+ </ul>
251+ </body></html>"#
252+ . to_string ( ) ) ,
253+ ) ,
254+ (
255+ "/dfpath-zoan" . to_string ( ) ,
256+ Ok ( r#"<html><body>
257+ <aside><figure class="pi-image piapia"><a href="/picurl" class="image"><img src="/picurlsrc"/></a></figure></aside>
258+ </body></html>"#
259+ . to_string ( ) ) ,
260+ ) ,
261+ ] ) ;
262+ let scrape = DfScraper :: new ( fetcher, "" ) ;
263+ let df_list = scrape. get_df_list ( ) . await . unwrap ( ) ;
264+ assert_eq ! ( df_list. len( ) , 4 ) ;
265+ }
266+ }
0 commit comments