1
+ use reqwest:: { Client , Request } ;
2
+ use reqwest:: { Method , Error } ;
3
+ use reqwest:: header:: HeaderValue ;
4
+ use url:: { Origin , Url } ;
5
+ use reqwest:: header:: USER_AGENT ;
6
+ use crate :: http:: { RobotsTxtClient , DEFAULT_USER_AGENT } ;
7
+ use crate :: parser:: { ParseResult , parse_fetched_robots_txt} ;
8
+ use crate :: model:: FetchedRobotsTxt ;
9
+ use std:: pin:: Pin ;
10
+ use futures:: task:: { Context , Poll } ;
11
+ use futures:: Future ;
12
+ use futures:: future:: TryFutureExt ;
13
+ use futures:: future:: ok as future_ok;
14
+
15
+ type FetchFuture = Box < dyn Future < Output =Result < ( ResponseInfo , String ) , Error > > > ;
16
+
17
+ impl RobotsTxtClient for Client {
18
+ type Result = RobotsTxtResponse ;
19
+ fn fetch_robots_txt ( & self , origin : Origin ) -> Self :: Result {
20
+ let url = format ! ( "{}/robots.txt" , origin. unicode_serialization( ) ) ;
21
+ let url = Url :: parse ( & url) . expect ( "Unable to parse robots.txt url" ) ;
22
+ let mut request = Request :: new ( Method :: GET , url) ;
23
+ let _ = request. headers_mut ( ) . insert ( USER_AGENT , HeaderValue :: from_static ( DEFAULT_USER_AGENT ) ) ;
24
+ let response = self
25
+ . execute ( request)
26
+ . and_then ( |response| {
27
+ let response_info = ResponseInfo { status_code : response. status ( ) . as_u16 ( ) } ;
28
+ return response. text ( ) . and_then ( |response_text| {
29
+ return future_ok ( ( response_info, response_text) ) ;
30
+ } ) ;
31
+ } ) ;
32
+ let response: Pin < Box < dyn Future < Output =Result < ( ResponseInfo , String ) , Error > > > > = Box :: pin ( response) ;
33
+ return RobotsTxtResponse {
34
+ origin,
35
+ response,
36
+ }
37
+ }
38
+ }
39
+
40
+ struct ResponseInfo {
41
+ status_code : u16 ,
42
+ }
43
+
44
+ /// Future for fetching robots.txt result.
45
+ pub struct RobotsTxtResponse {
46
+ origin : Origin ,
47
+ response : Pin < FetchFuture > ,
48
+ }
49
+
50
+ impl RobotsTxtResponse {
51
+ /// Returns origin of robots.txt
52
+ pub fn get_origin ( & self ) -> & Origin {
53
+ return & self . origin ;
54
+ }
55
+ }
56
+
57
+ impl Future for RobotsTxtResponse {
58
+ type Output = Result < ParseResult < FetchedRobotsTxt > , Error > ;
59
+
60
+ fn poll ( self : Pin < & mut Self > , cx : & mut Context ) -> Poll < Self :: Output > {
61
+ let self_mut = self . get_mut ( ) ;
62
+ let response_pin = self_mut. response . as_mut ( ) ;
63
+ match response_pin. poll ( cx) {
64
+ Poll :: Ready ( Ok ( ( response_info, text) ) ) => {
65
+ let robots_txt = parse_fetched_robots_txt ( self_mut. origin . clone ( ) , response_info. status_code , & text) ;
66
+ return Poll :: Ready ( Ok ( robots_txt) ) ;
67
+ } ,
68
+ Poll :: Ready ( Err ( error) ) => {
69
+ return Poll :: Ready ( Err ( error) ) ;
70
+ } ,
71
+ Poll :: Pending => {
72
+ return Poll :: Pending ;
73
+ } ,
74
+ }
75
+ }
76
+ }
0 commit comments