@@ -11,11 +11,8 @@ import (
1111 "os"
1212 "strconv"
1313 "strings"
14- "sync"
15- "time"
1614
1715 "github.com/PuerkitoBio/goquery"
18- "github.com/wabarc/helper"
1916 "github.com/wabarc/logger"
2017)
2118
4239 scheme = "http"
4340 onion = "archiveiya74codqgiixo33q62qlrqtkgmcitqx5u2oeqnmn5bpcbiyd.onion" // archivecaslytosk.onion
4441 cookie = ""
45- timeout = 120 * time .Second
4642 domains = []string {
4743 "archive.today" ,
4844 "archive.is" ,
@@ -62,123 +58,67 @@ func init() {
6258}
6359
6460// Wayback is the handle of saving webpages to archive.is
65- func (wbrc * Archiver ) Wayback (links []string ) (map [string ]string , error ) {
66- collects , results := make (map [string ]string ), make (map [string ]string )
67- for _ , link := range links {
68- if helper .IsURL (link ) {
69- collects [link ] = link
70- }
71- }
72- if len (collects ) == 0 {
73- return results , fmt .Errorf ("Not found" )
74- }
75-
76- ctx , cancel := context .WithCancel (context .Background ())
77- defer cancel ()
61+ func (wbrc * Archiver ) Wayback (ctx context.Context , in * url.URL ) (dst string , err error ) {
7862 torClient , t , err := newTorClient (ctx )
79- defer closeTor (t )
63+ defer closeTor (t ) // nolint:errcheck
8064 if err != nil {
8165 logger .Error ("%v" , err )
8266 }
8367
8468 is := & IS {
8569 wbrc : wbrc ,
86- httpClient : & http.Client {Timeout : timeout , CheckRedirect : noRedirect },
70+ httpClient : & http.Client {CheckRedirect : noRedirect },
8771 torClient : torClient ,
8872 }
8973
90- ch := make (chan string , len (collects ))
91- defer close (ch )
92-
93- var mu sync.Mutex
94- var wg sync.WaitGroup
95- for _ , link := range collects {
96- wg .Add (1 )
97- go func (link string ) {
98- mu .Lock ()
99- is .submitid = ""
100- is .archive (link , ch )
101- results [link ] = strings .Replace (<- ch , onion , "archive.today" , 1 )
102- mu .Unlock ()
103- wg .Done ()
104- }(link )
105- }
106- wg .Wait ()
107-
108- if len (results ) == 0 {
109- return results , fmt .Errorf ("No results" )
74+ dst , err = is .archive (ctx , in )
75+ if err != nil {
76+ return
11077 }
78+ dst = strings .Replace (dst , onion , "archive.today" , 1 )
11179
112- return results , nil
80+ return
11381}
11482
11583// Playback handle searching archived webpages from archive.is
116- func (wbrc * Archiver ) Playback (links []string ) (map [string ]string , error ) {
117- collects , results := make (map [string ]string ), make (map [string ]string )
118- for _ , link := range links {
119- if helper .IsURL (link ) {
120- collects [link ] = link
121- }
122- }
123- if len (collects ) == 0 {
124- return results , fmt .Errorf ("Not found" )
125- }
126-
127- ctx , cancel := context .WithCancel (context .Background ())
128- defer cancel ()
84+ func (wbrc * Archiver ) Playback (ctx context.Context , in * url.URL ) (dst string , err error ) {
12985 torClient , t , err := newTorClient (ctx )
130- defer closeTor (t )
86+ defer closeTor (t ) // nolint:errcheck
13187 if err != nil {
13288 logger .Error ("%v" , err )
13389 }
13490
13591 is := & IS {
13692 wbrc : wbrc ,
137- httpClient : & http.Client {Timeout : timeout , CheckRedirect : noRedirect },
93+ httpClient : & http.Client {CheckRedirect : noRedirect },
13894 torClient : torClient ,
13995 }
14096
141- ch := make (chan string , len (collects ))
142- defer close (ch )
143-
144- var mu sync.Mutex
145- var wg sync.WaitGroup
146- for _ , link := range collects {
147- wg .Add (1 )
148- go func (link string ) {
149- mu .Lock ()
150- is .submitid = ""
151- is .search (link , ch )
152- results [link ] = strings .Replace (<- ch , onion , "archive.today" , 1 )
153- mu .Unlock ()
154- wg .Done ()
155- }(link )
156- }
157- wg .Wait ()
158-
159- if len (results ) == 0 {
160- return results , fmt .Errorf ("No results" )
97+ dst , err = is .search (ctx , in )
98+ if err != nil {
99+ return
161100 }
101+ dst = strings .Replace (dst , onion , "archive.today" , 1 )
162102
163- return results , nil
103+ return
164104}
165- func (is * IS ) archive (uri string , ch chan <- string ) {
105+ func (is * IS ) archive (ctx context. Context , u * url. URL ) ( string , error ) {
166106 endpoint , err := is .getValidDomain ()
167107 if err != nil {
168- ch <- fmt .Sprint ("archive.today is unavailable." )
169- return
108+ return "" , fmt .Errorf ("archive.today is unavailable." )
170109 }
171110
172111 if is .wbrc .Anyway != "" {
173112 anyway = is .wbrc .Anyway
174113 }
114+ uri := u .String ()
175115 data := url.Values {
176116 "submitid" : {is .submitid },
177117 "anyway" : {anyway },
178118 "url" : {uri },
179119 }
180120 domain := endpoint .String ()
181- req , err := http .NewRequest ( "POST" , domain + "/submit/" , strings .NewReader (data .Encode ()))
121+ req , _ := http .NewRequestWithContext ( ctx , http . MethodPost , domain + "/submit/" , strings .NewReader (data .Encode ()))
182122 req .Header .Add ("Content-Type" , "application/x-www-form-urlencoded" )
183123 req .Header .Add ("Content-Length" , strconv .Itoa (len (data .Encode ())))
184124 req .Header .Add ("User-Agent" , userAgent )
@@ -188,46 +128,40 @@ func (is *IS) archive(uri string, ch chan<- string) {
188128 req .Header .Add ("Cookie" , is .getCookie ())
189129 resp , err := is .httpClient .Do (req )
190130 if err != nil {
191- ch <- fmt .Sprint (err )
192- return
131+ return "" , err
193132 }
194133 defer resp .Body .Close ()
195134
196135 code := resp .StatusCode / 100
197136 if code == 1 || code == 4 || code == 5 {
198137 final := fmt .Sprintf ("%s?url=%s" , domain , uri )
199- ch <- final
200- return
138+ return final , nil
201139 }
202140
203141 _ , err = io .Copy (ioutil .Discard , resp .Body )
204142 if err != nil {
205- ch <- fmt .Sprint (err )
206- return
143+ return "" , err
207144 }
208145
209146 // When use anyway parameter.
210147 refresh := resp .Header .Get ("Refresh" )
211148 if len (refresh ) > 0 {
212149 r := strings .Split (refresh , ";url=" )
213150 if len (r ) == 2 {
214- ch <- r [1 ]
215- return
151+ return r [1 ], nil
216152 }
217153 }
218154 loc := resp .Header .Get ("location" )
219155 if len (loc ) > 2 {
220- ch <- loc
221- return
156+ return loc , nil
222157 }
223158 // Redirect to final url if page saved.
224159 final := resp .Request .URL .String ()
225- if len (final ) > 0 && strings .Contains (final , "/submit/" ) == false {
226- ch <- final
227- return
160+ if len (final ) > 0 && ! strings .Contains (final , "/submit/" ) {
161+ return final , nil
228162 }
229163
230- ch <- fmt .Sprintf ("%s/timegate/%s" , domain , uri )
164+ return fmt .Sprintf ("%s/timegate/%s" , domain , uri ), nil
231165}
232166
233167func noRedirect (req * http.Request , via []* http.Request ) error {
@@ -248,12 +182,12 @@ func (is *IS) getCookie() string {
248182}
249183
250184func (is * IS ) getSubmitID (url string ) (string , error ) {
251- if strings .Contains (url , "http" ) == false {
185+ if ! strings .Contains (url , "http" ) {
252186 return "" , fmt .Errorf ("missing protocol scheme" )
253187 }
254188
255189 r := strings .NewReader ("" )
256- req , err := http .NewRequest ("GET" , url , r )
190+ req , _ := http .NewRequest ("GET" , url , r )
257191 req .Header .Add ("Content-Type" , "application/x-www-form-urlencoded" )
258192 req .Header .Add ("User-Agent" , userAgent )
259193 req .Header .Add ("Cookie" , is .getCookie ())
@@ -313,36 +247,36 @@ func (is *IS) getValidDomain() (*url.URL, error) {
313247 return endpoint , nil
314248}
315249
316- func (is * IS ) search (uri string , ch chan <- string ) {
250+ func (is * IS ) search (ctx context. Context , in * url. URL ) ( string , error ) {
317251 endpoint , err := is .getValidDomain ()
318252 if err != nil {
319- ch <- fmt .Sprint ("archive.today is unavailable." )
320- return
253+ return "" , fmt .Errorf ("archive.today is unavailable." )
321254 }
322255
256+ uri := in .String ()
323257 domain := endpoint .String ()
324- req , err := http .NewRequest ("GET" , fmt .Sprintf ("%s/%s" , domain , uri ), nil )
258+ req , err := http .NewRequestWithContext (ctx , http .MethodGet , fmt .Sprintf ("%s/%s" , domain , uri ), nil )
259+ if err != nil {
260+ return "" , err
261+ }
325262 req .Header .Add ("User-Agent" , userAgent )
326263 req .Header .Add ("Referer" , domain )
327264 req .Header .Add ("Host" , endpoint .Hostname ())
328265 resp , err := is .httpClient .Do (req )
329266 if err != nil {
330- ch <- fmt .Sprint (err )
331- return
267+ return "" , err
332268 }
333269 defer resp .Body .Close ()
334270
335271 doc , err := goquery .NewDocumentFromReader (resp .Body )
336272 if err != nil {
337- ch <- fmt .Sprint (err )
338- return
273+ return "" , err
339274 }
340275
341276 target , exists := doc .Find ("#row0 > .TEXT-BLOCK > a" ).Attr ("href" )
342277 if ! exists {
343- ch <- "Not found"
344- return
278+ return "" , fmt .Errorf ("Not found" )
345279 }
346280
347- ch <- target
281+ return target , nil
348282}
0 commit comments