11include ! ( concat!( env!( "OUT_DIR" ) , "/bad_websites.rs" ) ) ;
22
3+ /// Get the hostname from a url.
4+ pub fn get_host_from_url ( url : & str ) -> Option < & str > {
5+ let url = url
6+ . trim_start_matches ( "https://" )
7+ . trim_start_matches ( "http://" ) ;
8+
9+ if let Some ( pos) = url. find ( '/' ) {
10+ Some ( & url[ ..pos] )
11+ } else {
12+ Some ( & url)
13+ }
14+ }
15+
316/// The url is in the bad list.
417pub fn is_bad_website_url ( host : & str ) -> bool {
518 BAD_WEBSITES . contains ( & host)
@@ -20,6 +33,42 @@ pub fn is_gambling_website_url(host: &str) -> bool {
2033 GAMBLING_WEBSITES . contains ( & host)
2134}
2235
36+ /// The url is in the bad list removing the url http(s):// and paths.
37+ pub fn is_bad_website_url_clean ( host : & str ) -> bool {
38+ if let Some ( host) = get_host_from_url ( host) {
39+ BAD_WEBSITES . contains ( & host)
40+ } else {
41+ false
42+ }
43+ }
44+
45+ /// The url is in the ads list.
46+ pub fn is_ad_website_url_clean ( host : & str ) -> bool {
47+ if let Some ( host) = get_host_from_url ( host) {
48+ ADS_WEBSITES . contains ( & host)
49+ } else {
50+ false
51+ }
52+ }
53+
54+ /// The url is in the tracking list.
55+ pub fn is_tracking_website_url_clean ( host : & str ) -> bool {
56+ if let Some ( host) = get_host_from_url ( host) {
57+ TRACKING_WEBSITES . contains ( & host)
58+ } else {
59+ false
60+ }
61+ }
62+
63+ /// The url is in the gambling list.
64+ pub fn is_gambling_website_url ( host : & str ) -> bool {
65+ if let Some ( host) = get_host_from_url ( host) {
66+ GAMBLING_WEBSITES . contains ( & host)
67+ } else {
68+ false
69+ }
70+ }
71+
2372#[ cfg( test) ]
2473mod tests {
2574 use super :: * ;
0 commit comments