Skip to content

Commit 3f49b34

Browse files
committed
chore(utils): add shorthand domain removals
1 parent 4dc9271 commit 3f49b34

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

src/lib.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
include!(concat!(env!("OUT_DIR"), "/bad_websites.rs"));
22

3+
/// Get the hostname from a url.
4+
pub fn get_host_from_url(url: &str) -> Option<&str> {
5+
let url = url
6+
.trim_start_matches("https://")
7+
.trim_start_matches("http://");
8+
9+
if let Some(pos) = url.find('/') {
10+
Some(&url[..pos])
11+
} else {
12+
Some(&url)
13+
}
14+
}
15+
316
/// The url is in the bad list.
417
pub fn is_bad_website_url(host: &str) -> bool {
518
BAD_WEBSITES.contains(&host)
@@ -20,6 +33,42 @@ pub fn is_gambling_website_url(host: &str) -> bool {
2033
GAMBLING_WEBSITES.contains(&host)
2134
}
2235

36+
/// The url is in the bad list removing the url http(s):// and paths.
37+
pub fn is_bad_website_url_clean(host: &str) -> bool {
38+
if let Some(host) = get_host_from_url(host) {
39+
BAD_WEBSITES.contains(&host)
40+
} else {
41+
false
42+
}
43+
}
44+
45+
/// The url is in the ads list.
46+
pub fn is_ad_website_url_clean(host: &str) -> bool {
47+
if let Some(host) = get_host_from_url(host) {
48+
ADS_WEBSITES.contains(&host)
49+
} else {
50+
false
51+
}
52+
}
53+
54+
/// The url is in the tracking list.
55+
pub fn is_tracking_website_url_clean(host: &str) -> bool {
56+
if let Some(host) = get_host_from_url(host) {
57+
TRACKING_WEBSITES.contains(&host)
58+
} else {
59+
false
60+
}
61+
}
62+
63+
/// The url is in the gambling list.
64+
pub fn is_gambling_website_url(host: &str) -> bool {
65+
if let Some(host) = get_host_from_url(host) {
66+
GAMBLING_WEBSITES.contains(&host)
67+
} else {
68+
false
69+
}
70+
}
71+
2372
#[cfg(test)]
2473
mod tests {
2574
use super::*;

0 commit comments

Comments
 (0)