Skip to content

Commit ecf12f1

Browse files
authored
Boost Scraper Test Coverage (#6)
* dftype info scrape unit test * boost test coverage for df scraper
1 parent dfc11f6 commit ecf12f1

File tree

4 files changed

+173
-39
lines changed

4 files changed

+173
-39
lines changed

scraper/src/df/models.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,17 @@ pub struct DfTypeInfo {
1212
pub description: String,
1313
}
1414

15+
impl DfTypeInfo {
16+
pub fn new(df_type: DfType, cannon: u32, non_cannon: u32, description: String) -> Self {
17+
Self {
18+
df_type,
19+
cannon_count: cannon,
20+
non_cannon_count: non_cannon,
21+
description,
22+
}
23+
}
24+
}
25+
1526
impl std::fmt::Display for DfTypeInfo {
1627
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
1728
write!(
@@ -127,12 +138,6 @@ impl PartialEq for DevilFruit {
127138
}
128139
}
129140

130-
#[derive(Debug)]
131-
pub struct DfUser {
132-
pub id: String,
133-
pub name: String,
134-
}
135-
136141
#[derive(Debug)]
137142
pub struct Character {
138143
pub id: String,

scraper/src/df/parser.rs

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ pub struct CanonLogiaParser;
3131
impl DfTypeParser for CanonZoanParser {
3232
fn parse(&self, html: &Html) -> Result<Vec<DevilFruit>, Error> {
3333
let sibling_iter = html
34-
.select(&Selector::parse(&DfType::Zoan.id_for_fruit_list()).unwrap())
34+
.select(&Utils::parse_selector(&DfType::Zoan.id_for_fruit_list())?)
3535
.next()
3636
.and_then(|e| e.parent())
3737
.map(|n| n.next_siblings())
@@ -51,10 +51,7 @@ impl DfTypeParser for CanonZoanParser {
5151
&& el
5252
.first_child()
5353
.and_then(|n| ElementRef::wrap(n))
54-
.ok_or_else(|| false)
55-
.unwrap()
56-
.value()
57-
.id()
54+
.and_then(|el| el.value().id())
5855
.is_some_and(|s| s != "Canon"))
5956
})
6057
.filter(|el| el.value().name() == "ul")
@@ -88,7 +85,7 @@ macro_rules! impl_canon_paramecia_logia_parser {
8885
impl DfTypeParser for $T {
8986
fn parse(&self, html: &Html) -> Result<Vec<DevilFruit>, Error> {
9087
let fruits: Result<Vec<_>, _> = html
91-
.select(&Selector::parse(&$df_type.id_for_fruit_list()).unwrap())
88+
.select(&Utils::parse_selector(&$df_type.id_for_fruit_list())?)
9289
.next()
9390
.and_then(|e| e.parent())
9491
.map(|n| n.next_siblings())
@@ -235,8 +232,7 @@ impl Utils {
235232
let mut sub_type_map = HashMap::new();
236233

237234
for df_sub in DfSubType::iter() {
238-
let sub_type_selector = &Selector::parse(&df_sub.id_for_fruit_list())
239-
.map_err(|_| Error::InvalidStructure("Failed to parse selector".to_string()))?;
235+
let sub_type_selector = &Utils::parse_selector(&df_sub.id_for_fruit_list())?;
240236
let res: Result<(), Error> = html_doc
241237
.select(sub_type_selector)
242238
.next()
@@ -266,3 +262,16 @@ impl Utils {
266262
Ok(sub_type_map)
267263
}
268264
}
265+
266+
#[cfg(test)]
267+
mod tests {
268+
use crate::df::types::DfType;
269+
270+
use super::get_parser;
271+
272+
#[test]
273+
#[should_panic]
274+
fn test_get_parser() {
275+
get_parser(&DfType::Undetermined, true);
276+
}
277+
}

scraper/src/df/scraper.rs

Lines changed: 144 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use itertools::Itertools;
2-
use lazy_static::lazy_static;
32
use log::{error, info};
4-
use regex::Regex;
3+
use reqwest::Client;
54
use scraper::selectable::Selectable;
65
use scraper::Html;
76
use std::collections::HashMap;
@@ -11,34 +10,35 @@ use strum::IntoEnumIterator;
1110
use super::models::{DevilFruit, DfTypeInfo};
1211
use crate::df::parser::{get_parser, Utils};
1312
use crate::df::types::DfType;
14-
use crate::fetcher::HtmlFetcher;
13+
use crate::fetcher::{FetchHtml, HtmlFetcher};
1514
use crate::types::{Error, UrlTyped};
1615

1716
pub trait DfScrapable {
1817
async fn get_dftype_info(&self) -> Result<Vec<DfTypeInfo>, Error>;
1918
async fn get_df_list(&self) -> Result<Vec<DevilFruit>, Error>;
2019
}
2120

22-
lazy_static! {
23-
static ref REX_EN_NAME: Regex = Regex::new(r"English version: (.+)").unwrap();
24-
}
25-
2621
#[derive(Debug)]
27-
pub struct DfScraper {
28-
fetcher: HtmlFetcher,
22+
pub struct DfScraper<T = Client>
23+
where
24+
T: FetchHtml + Clone,
25+
{
26+
fetcher: HtmlFetcher<T>,
2927
base_url: String,
3028
}
3129

32-
impl DfScraper {
33-
pub fn new(fetcher: HtmlFetcher, base_url: &str) -> Self {
30+
impl<T: FetchHtml + Clone> DfScraper<T> {
31+
pub fn new(fetcher: HtmlFetcher<T>, base_url: &str) -> Self {
3432
Self {
3533
fetcher,
3634
base_url: base_url.to_string(),
3735
}
3836
}
3937
}
4038

41-
impl DfScrapable for DfScraper {
39+
impl<T: FetchHtml + Clone + std::marker::Send + std::marker::Sync + 'static> DfScrapable
40+
for DfScraper<T>
41+
{
4242
async fn get_dftype_info(&self) -> Result<Vec<DfTypeInfo>, Error> {
4343
let url = format!("{}/wiki/Devil_Fruit", self.base_url);
4444
let html = self.fetcher.fetch(&url).await?;
@@ -60,11 +60,7 @@ impl DfScrapable for DfScraper {
6060
.map(|row| {
6161
let cells = row.select(&td_selector).collect_vec();
6262
if cells.len() < 3 {
63-
let msg = format!(
64-
"Expected at least 3 cells, found {}: {:?}",
65-
cells.len(),
66-
row.html()
67-
);
63+
let msg = format!("Expected at least 3 cells, found {}", cells.len());
6864
return Err(Error::InvalidStructure(msg));
6965
}
7066

@@ -86,12 +82,7 @@ impl DfScrapable for DfScraper {
8682
DfType::Logia => l_desc.trim(),
8783
_ => "",
8884
};
89-
let obj = DfTypeInfo {
90-
df_type,
91-
cannon_count: cc,
92-
non_cannon_count: ncc,
93-
description: desc.to_string(),
94-
};
85+
let obj = DfTypeInfo::new(df_type, cc, ncc, desc.to_string());
9586
info!("obj: {}", &obj);
9687
Ok(obj)
9788
})
@@ -116,7 +107,6 @@ impl DfScrapable for DfScraper {
116107
devil_fruits_map.insert(df_url.clone(), df);
117108

118109
let fetcher = self.fetcher.clone();
119-
120110
pic_tasks.spawn(async move {
121111
let html = fetcher.fetch_only(&df_url).await?;
122112
let doc = Html::parse_document(&html);
@@ -144,3 +134,133 @@ impl DfScrapable for DfScraper {
144134
Ok(devil_fruits_map.into_values().sorted().collect_vec())
145135
}
146136
}
137+
138+
#[cfg(test)]
139+
mod tests {
140+
use std::collections::HashMap;
141+
142+
use async_trait::async_trait;
143+
144+
use crate::{
145+
df::scraper::{DfScrapable, DfScraper},
146+
fetcher::{FetchHtml, HtmlFetcher},
147+
types::Error,
148+
};
149+
150+
#[derive(Clone)]
151+
struct MockClient {
152+
res_req: HashMap<String, Result<String, Error>>,
153+
}
154+
155+
#[async_trait]
156+
impl FetchHtml for MockClient {
157+
async fn fetch(&self, url: &str) -> Result<String, Error> {
158+
self.res_req.get(url).cloned().unwrap()
159+
}
160+
}
161+
162+
fn prepare_fetcher<const N: usize>(
163+
arr: [(String, Result<String, Error>); N],
164+
) -> HtmlFetcher<MockClient> {
165+
let client = MockClient {
166+
res_req: HashMap::from(arr),
167+
};
168+
HtmlFetcher::new(client)
169+
}
170+
171+
#[tokio::test]
172+
async fn get_type_info() {
173+
let fetcher = prepare_fetcher([(
174+
"/wiki/Devil_Fruit".to_string(),
175+
Ok(r#"<html><body>
176+
<h4><span id="Paramecia">Paramecia</span></h4>
177+
<p>Paramecia Text</p>
178+
<h4><span id="Zoan">Zoan</span></h4>
179+
<p>Zoan Text</p>
180+
<h4><span id="Logia">Logia</span></h4>
181+
<p>Logia Text</p>
182+
<table class="wikitable">
183+
<tbody>
184+
<tr><th></th><th>Canon</th><th>Non-Canon</th><th>Total</th></tr>
185+
<tr><td>Paramecia</td><td>94 </td><td>48</td><td>142</td></tr>
186+
<tr><td>Zoan</td><td>55</td><td>7 </td><td> 62</td></tr>
187+
<tr><td>Logia</td><td>13</td><td>3</td><td>16 </td></tr>
188+
<tr><td>Undetermined</td><td>3</td><td>2</td><td>5</td></tr>
189+
<tr><td>Last</td><td></td><td></td><td></td></tr>
190+
</tbody></table>
191+
</body></html>"#
192+
.to_string()),
193+
)]);
194+
let scrape = DfScraper::new(fetcher, "");
195+
let result = scrape.get_dftype_info().await;
196+
assert!(result.is_ok());
197+
assert_eq!(result.unwrap().len(), 4);
198+
}
199+
200+
#[tokio::test]
201+
async fn get_df_list() {
202+
let fetcher = prepare_fetcher([
203+
(
204+
"/wiki/Logia".to_string(),
205+
Ok(r#"<html><body>
206+
<h4><span id="Logia-Types">Logia</span></h4>
207+
<dl></dl>
208+
<h3>Some Text</h3>
209+
<dl></dl>
210+
<ul>
211+
<li><a href="/dfpath_logia" title="Some Logia Df">Some Logia Devil Fruit</a> (<i>English versions: Some En Logia Df</i>): Some description. Eaten by <a href="/character">Df User</a>.</li>
212+
</ul>
213+
</body></html>"#
214+
.to_string()),
215+
),
216+
(
217+
"/wiki/Paramecia".to_string(),
218+
Ok(r#"<html><body>
219+
<h4><span id="Paramecia-Type_Fruits">Paramecia</span></h4>
220+
<dl></dl>
221+
<h3>Some Text</h3>
222+
<dl></dl>
223+
<ul>
224+
<li><a href="/dfpath_paramecia" title="Some Paramecia Df">Some Paramecia Devil Fruit</a> (<i>English versions: Some En Paramecia Df</i>): Some description. Eaten by <a href="/character">Df User</a>.</li>
225+
</ul>
226+
</body></html>"#
227+
.to_string()),
228+
),
229+
(
230+
"/wiki/Zoan".to_string(),
231+
Ok(r#"<html><body>
232+
<h4><span id="Ancient_Zoan">Ancient Zoan</span></h4>
233+
<dl></dl>
234+
<p>wow</p>
235+
<ul>
236+
<li><a href="/dfpath-ancient-zoan" title="Some Ancient Zoan">Some ancient zoan</a>: Eaten by <a href="/characterx" title="X">X</a>.</li>
237+
</ul>
238+
<h4><span id="Mythical_Zoan">Mythical Zoan</span></h4>
239+
<dl></dl>
240+
<p>wow</p>
241+
<ul>
242+
<li><a href="/dfpath-mythical-zoan" title="Some Mythical Zoan">Some mythical zoan</a>: Eaten by <a href="/characterx" title="X">X</a>.</li>
243+
</ul>
244+
<h4><span id="List_of_Zoan-Type_Fruits">Zoan</span></h4>
245+
<h3>Some Text</h3>
246+
<dl></dl>
247+
<ul>
248+
<li><a href="/dfpath-zoan" title="Some Zoan Df">Some Zoan Devil Fruit</a> (<i>English versions: Some En Zoan Df</i>): Some description. Eaten by <a href="/character">Df User</a>.</li>
249+
<li><a href="/dfpath-ancient-zoan" title="Some Ancient Zoan">Some ancient zoan</a> (<i>English versions: Some En Ancient Zoan Df</i>): Some description. Eaten by <a href="/characterx">X</a>.</li>
250+
</ul>
251+
</body></html>"#
252+
.to_string()),
253+
),
254+
(
255+
"/dfpath-zoan".to_string(),
256+
Ok(r#"<html><body>
257+
<aside><figure class="pi-image piapia"><a href="/picurl" class="image"><img src="/picurlsrc"/></a></figure></aside>
258+
</body></html>"#
259+
.to_string()),
260+
),
261+
]);
262+
let scrape = DfScraper::new(fetcher, "");
263+
let df_list = scrape.get_df_list().await.unwrap();
264+
assert_eq!(df_list.len(), 4);
265+
}
266+
}

scraper/src/fetcher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ impl FetchHtml for Client {
2727
}
2828

2929
#[derive(Debug, Clone)]
30-
pub struct HtmlFetcher<T = Client>
30+
pub struct HtmlFetcher<T>
3131
where
3232
T: FetchHtml,
3333
{

0 commit comments

Comments
 (0)