Skip to content

Commit 15cdbe6

Browse files
committed
Improve tor connecton
1 parent 4ad567f commit 15cdbe6

File tree

5 files changed

+161
-67
lines changed

5 files changed

+161
-67
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ func main() {
5858
// https://www.bbc.com => https://archive.li/HjqQV
5959
```
6060

61+
### Access Tor Hidden Service
62+
63+
[archive.today](https://archive.today) providing [Tor Hidden Service](http://archivecaslytosk.onion/) to saving webpage, and it's preferred to access
64+
Tor Hidden Service, access <http://archive.today> if Tor Hidden Service is unavailable.
65+
66+
By default, the program will dial a proxy using tor socks port `127.0.0.1:9050`,
67+
use `TOR_HOST` and `TOR_SOCKS_PORT` specified a different host and port
68+
69+
It'll look up tor executable file if dial socks proxy failed, and start it to dial proxy.
70+
6171
## FAQ
6272

6373
### archive.today is unavailable?

go.mod

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@ go 1.15
44

55
require (
66
github.com/PuerkitoBio/goquery v1.6.1
7+
github.com/andybalholm/cascadia v1.2.0 // indirect
78
github.com/cretz/bine v0.1.0
89
github.com/stretchr/testify v1.7.0 // indirect
9-
github.com/wabarc/helper v0.0.0-20210127120855-10af37cc2616
10+
github.com/wabarc/helper v0.0.0-20210407153720-1bfe98b427fe
1011
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee
12+
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc // indirect
13+
golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d
14+
golang.org/x/sys v0.0.0-20210415045647-66c3f260301c // indirect
1115
)

go.sum

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,51 @@
11
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
22
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
3-
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
43
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
4+
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
5+
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
56
github.com/cretz/bine v0.1.0 h1:1/fvhLE+fk0bPzjdO5Ci+0ComYxEMuB1JhM4X5skT3g=
67
github.com/cretz/bine v0.1.0/go.mod h1:6PF6fWAvYtwjRGkAuDEJeWNOv3a2hUouSP/yRYXmvHw=
78
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
89
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
10+
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
11+
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
12+
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
13+
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
14+
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
915
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
1016
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
17+
github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
1118
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
1219
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
1320
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
14-
github.com/wabarc/helper v0.0.0-20210127120855-10af37cc2616 h1:wZ5HtpmZAVUq0Im5Sm92ycJrTeLJk5lB/Kvh55Rd+Ps=
15-
github.com/wabarc/helper v0.0.0-20210127120855-10af37cc2616/go.mod h1:N9P4r7Rn46p4nkWtXV6ztN3p5ACVnp++bgfwjTqSxQ8=
21+
github.com/wabarc/helper v0.0.0-20210407153720-1bfe98b427fe h1:V9yz2vQlSVLs51nlo0DAeETFOE57OvlYm98X1LKJA6U=
22+
github.com/wabarc/helper v0.0.0-20210407153720-1bfe98b427fe/go.mod h1:TuTZtoiOu984UWOf7FfX58JllKMjq7FCz701kB5W88E=
1623
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee h1:MMIp++7eem2CI1jIYDoPByMwXeZAjsFo2ciBNtvhB80=
1724
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee/go.mod h1:4uYr9fnQaQoDk1ttTzLnSB3lZm3i/vrJwN8EZIB2YuI=
18-
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
1925
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
26+
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc h1:+q90ECDSAQirdykUN6sPEiBXBsp8Csjcca8Oy7bgLTA=
27+
golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
2028
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
21-
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
2229
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
23-
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU=
30+
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
31+
golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d h1:BgJvlyh+UqCUaPlscHJ+PN8GcpfrFdr7NHjd1JL0+Gs=
32+
golang.org/x/net v0.0.0-20210415231046-e915ea6b2b7d/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8=
2433
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
34+
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
35+
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
36+
golang.org/x/sys v0.0.0-20210415045647-66c3f260301c h1:6L+uOeS3OQt/f4eFHXZcTxeZrGCuz+CLElgEBjbcTA4=
37+
golang.org/x/sys v0.0.0-20210415045647-66c3f260301c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
38+
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
2539
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
26-
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
40+
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
41+
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
42+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
2743
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
44+
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
45+
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
46+
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
47+
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
2848
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
2949
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
50+
mvdan.cc/xurls/v2 v2.2.0 h1:NSZPykBXJFCetGZykLAxaL6SIpvbVy/UFEniIfHAa8A=
51+
mvdan.cc/xurls/v2 v2.2.0/go.mod h1:EV1RMtya9D6G5DMYPGD8zTQzaHet6Jh8gFlRgGRJeO8=

is.go

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ var (
5555
}
5656
)
5757

58+
func init() {
59+
if os.Getenv("DEBUG") != "" {
60+
logger.EnableDebug()
61+
}
62+
}
63+
5864
// Wayback is the handle of saving webpages to archive.is
5965
func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
6066
collects, results := make(map[string]string), make(map[string]string)
@@ -66,11 +72,16 @@ func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
6672
collects[link] = link
6773
}
6874

69-
torClient, tor, err := newTorClient()
75+
done := make(chan bool, 1)
76+
torClient, err := newTorClient(done)
7077
if err != nil {
7178
logger.Error("%v", err)
72-
} else {
73-
defer tor.Close()
79+
}
80+
81+
is := &IS{
82+
wbrc: wbrc,
83+
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
84+
torClient: torClient,
7485
}
7586

7687
ch := make(chan string, len(collects))
@@ -81,13 +92,8 @@ func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
8192
for _, link := range collects {
8293
wg.Add(1)
8394
go func(link string) {
84-
is := &IS{
85-
wbrc: wbrc,
86-
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
87-
torClient: torClient,
88-
submitid: "",
89-
}
9095
mu.Lock()
96+
is.submitid = ""
9197
is.archive(link, ch)
9298
results[link] = strings.Replace(<-ch, onion, "archive.today", 1)
9399
mu.Unlock()
@@ -96,6 +102,9 @@ func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
96102
}
97103
wg.Wait()
98104

105+
// Close tor connection
106+
done <- true
107+
99108
if len(results) == 0 {
100109
return results, fmt.Errorf("No results")
101110
}
@@ -115,11 +124,16 @@ func (wbrc *Archiver) Playback(links []string) (map[string]string, error) {
115124
return results, fmt.Errorf("No found URL")
116125
}
117126

118-
torClient, tor, err := newTorClient()
127+
done := make(chan bool, 1)
128+
torClient, err := newTorClient(done)
119129
if err != nil {
120130
logger.Error("%v", err)
121-
} else {
122-
defer tor.Close()
131+
}
132+
133+
is := &IS{
134+
wbrc: wbrc,
135+
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
136+
torClient: torClient,
123137
}
124138

125139
ch := make(chan string, len(collects))
@@ -130,13 +144,8 @@ func (wbrc *Archiver) Playback(links []string) (map[string]string, error) {
130144
for _, link := range collects {
131145
wg.Add(1)
132146
go func(link string) {
133-
is := &IS{
134-
wbrc: wbrc,
135-
httpClient: &http.Client{Timeout: timeout, CheckRedirect: noRedirect},
136-
torClient: torClient,
137-
submitid: "",
138-
}
139147
mu.Lock()
148+
is.submitid = ""
140149
is.search(link, ch)
141150
results[link] = strings.Replace(<-ch, onion, "archive.today", 1)
142151
mu.Unlock()
@@ -195,6 +204,15 @@ func (is *IS) archive(uri string, ch chan<- string) {
195204
return
196205
}
197206

207+
// When use anyway parameter.
208+
refresh := resp.Header.Get("Refresh")
209+
if len(refresh) > 0 {
210+
r := strings.Split(refresh, ";url=")
211+
if len(r) == 2 {
212+
ch <- r[1]
213+
return
214+
}
215+
}
198216
loc := resp.Header.Get("location")
199217
if len(loc) > 2 {
200218
ch <- loc
@@ -206,15 +224,6 @@ func (is *IS) archive(uri string, ch chan<- string) {
206224
ch <- final
207225
return
208226
}
209-
// When use anyway parameter.
210-
refresh := resp.Header.Get("refresh")
211-
if len(refresh) > 0 {
212-
r := strings.Split(refresh, ";url=")
213-
if len(r) == 2 {
214-
ch <- r[1]
215-
return
216-
}
217-
}
218227

219228
ch <- fmt.Sprintf("%s/timegate/%s", domain, uri)
220229
}

tor.go

Lines changed: 81 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,51 +4,76 @@ import (
44
"context"
55
"crypto/tls"
66
"fmt"
7-
// "net"
7+
"net"
88
"net/http"
99
"os"
1010
"os/exec"
1111
"runtime"
1212
"time"
1313

1414
"github.com/cretz/bine/tor"
15-
// "golang.org/x/net/proxy"
15+
"github.com/wabarc/logger"
16+
"golang.org/x/net/proxy"
1617
)
1718

18-
func newTorClient() (*http.Client, *tor.Tor, error) {
19-
// Lookup tor executable file
20-
if _, err := exec.LookPath("tor"); err != nil {
21-
return nil, nil, fmt.Errorf("%w", err)
22-
}
19+
func newTorClient(done <-chan bool) (*http.Client, error) {
20+
var dialer proxy.ContextDialer
21+
if useProxy() {
22+
// Create a socks5 dialer
23+
pxy, err := proxy.SOCKS5("tcp", "127.0.0.1:9050", nil, proxy.Direct)
24+
if err != nil {
25+
return nil, fmt.Errorf("Can't connect to the proxy: %w", err)
26+
}
2327

24-
// Start tor with default config
25-
startConf := &tor.StartConf{TempDataDirBase: os.TempDir()}
26-
t, err := tor.Start(nil, startConf)
27-
if err != nil {
28-
return nil, nil, fmt.Errorf("Make connection failed: %w", err)
29-
}
30-
// defer t.Close()
28+
dialer = pxy.(interface {
29+
DialContext(ctx context.Context, network, addr string) (net.Conn, error)
30+
})
31+
} else {
32+
// Lookup tor executable file
33+
if _, err := exec.LookPath("tor"); err != nil {
34+
return nil, fmt.Errorf("%w", err)
35+
}
3136

32-
// Wait at most a minute to start network and get
33-
dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Minute)
34-
defer dialCancel()
37+
// Start tor with default config
38+
startConf := &tor.StartConf{TempDataDirBase: os.TempDir()}
39+
t, err := tor.Start(nil, startConf)
40+
if err != nil {
41+
return nil, fmt.Errorf("Make connection failed: %w", err)
42+
}
43+
// defer t.Close()
3544

36-
// Make connection
37-
dialer, err := t.Dialer(dialCtx, nil)
38-
if err != nil {
39-
t.Close()
40-
return nil, nil, fmt.Errorf("Make connection failed: %w", err)
41-
}
45+
// Wait at most a minute to start network and get
46+
dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Minute)
47+
defer dialCancel()
4248

43-
// Create a socks5 dialer
44-
// pxy, err := proxy.SOCKS5("tcp", "127.0.0.1:9050", nil, proxy.Direct)
45-
// if err != nil {
46-
// return nil, fmt.Errorf("Can't connect to the proxy: %w", err)
47-
// }
49+
// Make connection
50+
dialer, err = t.Dialer(dialCtx, nil)
51+
if err != nil {
52+
t.Close()
53+
return nil, fmt.Errorf("Make connection failed: %w", err)
54+
}
4855

49-
// dialer := pxy.(interface {
50-
// DialContext(ctx context.Context, network, addr string) (net.Conn, error)
51-
// })
56+
go func() {
57+
// Auto close tor client after 10 min
58+
tick := time.NewTicker(10 * time.Minute)
59+
for {
60+
select {
61+
case <-done:
62+
logger.Debug("Closed tor client")
63+
tick.Stop()
64+
t.Close()
65+
return
66+
case <-tick.C:
67+
logger.Debug("Closed tor client, timeout")
68+
tick.Stop()
69+
t.Close()
70+
return
71+
default:
72+
logger.Debug("Waiting for close tor client")
73+
}
74+
}
75+
}()
76+
}
5277

5378
return &http.Client{
5479
Timeout: timeout,
@@ -65,5 +90,29 @@ func newTorClient() (*http.Client, *tor.Tor, error) {
6590
InsecureSkipVerify: true,
6691
},
6792
},
68-
}, t, nil
93+
}, nil
94+
}
95+
96+
func useProxy() bool {
97+
host := os.Getenv("TOR_HOST")
98+
port := os.Getenv("TOR_SOCKS_PORT")
99+
if host == "" {
100+
host = "127.0.0.1"
101+
}
102+
if port == "" {
103+
port = "9050"
104+
}
105+
106+
conn, err := net.DialTimeout("tcp", net.JoinHostPort(host, port), time.Second)
107+
if err != nil {
108+
logger.Debug("Try to connect tor proxy failed: %v", err)
109+
return false
110+
}
111+
if conn != nil {
112+
conn.Close()
113+
logger.Debug("Connected: %v", net.JoinHostPort(host, port))
114+
return true
115+
}
116+
117+
return false
69118
}

0 commit comments

Comments
 (0)