Skip to content

Commit 13dac45

Browse files
committed
Minor improvements
1 parent e9534bb commit 13dac45

File tree

6 files changed

+83
-59
lines changed

6 files changed

+83
-59
lines changed

.gitignore

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Binaries for programs and plugins
2+
*.exe
3+
*.exe~
4+
*.dll
5+
*.so
6+
*.dylib
7+
8+
# Test binary, built with `go test -c`
9+
*.test
10+
11+
# Output of the go coverage tool, specifically when used with LiteIDE
12+
*.out
13+
14+
# Dependency directories (remove the comment below to include it)
15+
# vendor/

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,4 +180,4 @@ fmt:
180180

181181
test:
182182
@echo "-> Running go test"
183-
go test -v ./...
183+
@CGO_ENABLED=1 go test -v -race -cover -coverprofile=coverage.out -covermode=atomic ./...

pkg/http.go

Lines changed: 31 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
package is
22

33
import (
4-
"context"
54
"fmt"
65
"io"
76
"io/ioutil"
8-
"net"
97
"net/http"
108
"net/url"
119
"os"
@@ -16,16 +14,13 @@ import (
1614
"github.com/PuerkitoBio/goquery"
1715
)
1816

19-
type Archiver struct {
20-
Anyway string
21-
Cookie string
17+
type IS struct {
18+
wbrc *Archiver
2219

23-
DialContext func(ctx context.Context, network, addr string) (net.Conn, error)
24-
SkipTLSVerification bool
25-
26-
final string
2720
submitid string
21+
final string
2822

23+
baseuri *url.URL
2924
httpClient *http.Client
3025
torClient *http.Client
3126
}
@@ -37,7 +32,6 @@ var (
3732
onion = "archivecaslytosk.onion" // archiveiya74codqgiixo33q62qlrqtkgmcitqx5u2oeqnmn5bpcbiyd.onion
3833
cookie = ""
3934
timeout = 120 * time.Second
40-
baseuri *url.URL
4135
domains = []string{
4236
"archive.today",
4337
"archive.is",
@@ -49,59 +43,54 @@ var (
4943
}
5044
)
5145

52-
func (wbrc *Archiver) fetch(s string, ch chan<- string) {
53-
wbrc.httpClient = &http.Client{
54-
Timeout: timeout,
55-
}
56-
46+
func (is *IS) fetch(s string, ch chan<- string) {
5747
// get valid domain and submitid
5848
r := func(domains []string) {
5949
for _, domain := range domains {
6050
h := fmt.Sprintf("%v://%v", scheme, domain)
61-
id, err := wbrc.getSubmitID(h)
51+
id, err := is.getSubmitID(h)
6252
if err != nil {
6353
continue
6454
}
65-
baseuri, _ = url.Parse(h)
66-
wbrc.submitid = id
55+
is.baseuri, _ = url.Parse(h)
56+
is.submitid = id
6757
break
6858
}
6959
}
7060

7161
// Try request over Tor hidden service.
72-
if wbrc.torClient != nil {
73-
wbrc.httpClient = wbrc.torClient
62+
if is.torClient != nil {
63+
is.httpClient = is.torClient
7464

7565
r([]string{onion})
7666
}
77-
defer wbrc.clear()
7867

79-
if baseuri == nil || wbrc.submitid == "" {
68+
if is.baseuri == nil || is.submitid == "" {
8069
r(domains)
81-
if baseuri == nil || wbrc.submitid == "" {
70+
if is.baseuri == nil || is.submitid == "" {
8271
ch <- fmt.Sprint("archive.today is unavailable.")
8372
return
8473
}
8574
}
8675

87-
if wbrc.Anyway != "" {
88-
anyway = wbrc.Anyway
76+
if is.wbrc.Anyway != "" {
77+
anyway = is.wbrc.Anyway
8978
}
9079
data := url.Values{
91-
"submitid": {wbrc.submitid},
80+
"submitid": {is.submitid},
9281
"anyway": {anyway},
9382
"url": {s},
9483
}
95-
uri := baseuri.String()
96-
req, err := http.NewRequest("POST", baseuri.String()+"/submit/", strings.NewReader(data.Encode()))
84+
uri := is.baseuri.String()
85+
req, err := http.NewRequest("POST", is.baseuri.String()+"/submit/", strings.NewReader(data.Encode()))
9786
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
9887
req.Header.Add("Content-Length", strconv.Itoa(len(data.Encode())))
9988
req.Header.Add("User-Agent", userAgent)
10089
req.Header.Add("Referer", uri)
10190
req.Header.Add("Origin", uri)
102-
req.Header.Add("Host", baseuri.Hostname())
103-
req.Header.Add("Cookie", wbrc.getCookie())
104-
resp, err := wbrc.httpClient.Do(req)
91+
req.Header.Add("Host", is.baseuri.Hostname())
92+
req.Header.Add("Cookie", is.getCookie())
93+
resp, err := is.httpClient.Do(req)
10594
if err != nil {
10695
ch <- fmt.Sprint(err)
10796
return
@@ -124,38 +113,38 @@ func (wbrc *Archiver) fetch(s string, ch chan<- string) {
124113
// Redirect to final url if page saved.
125114
final := resp.Request.URL.String()
126115
if len(final) > 0 && strings.Contains(final, "/submit/") == false {
127-
wbrc.final = final
116+
is.final = final
128117
}
129118
loc := resp.Header.Get("location")
130119
if len(loc) > 2 {
131-
wbrc.final = loc
120+
is.final = loc
132121
}
133122
// When use anyway parameter.
134123
refresh := resp.Header.Get("refresh")
135124
if len(refresh) > 0 {
136125
r := strings.Split(refresh, ";url=")
137126
if len(r) == 2 {
138-
wbrc.final = r[1]
127+
is.final = r[1]
139128
}
140129
}
141130

142-
ch <- wbrc.final
131+
ch <- is.final
143132
}
144133

145-
func (wbrc *Archiver) getCookie() string {
134+
func (is *IS) getCookie() string {
146135
c := os.Getenv("ARCHIVE_COOKIE")
147136
if c != "" {
148-
wbrc.Cookie = c
137+
is.wbrc.Cookie = c
149138
}
150139

151-
if wbrc.Cookie != "" {
152-
return wbrc.Cookie
140+
if is.wbrc.Cookie != "" {
141+
return is.wbrc.Cookie
153142
} else {
154143
return cookie
155144
}
156145
}
157146

158-
func (wbrc *Archiver) getSubmitID(url string) (string, error) {
147+
func (is *IS) getSubmitID(url string) (string, error) {
159148
if strings.Contains(url, "http") == false {
160149
return "", fmt.Errorf("missing protocol scheme")
161150
}
@@ -164,8 +153,8 @@ func (wbrc *Archiver) getSubmitID(url string) (string, error) {
164153
req, err := http.NewRequest("GET", url, r)
165154
req.Header.Add("Content-Type", "application/x-www-form-urlencoded")
166155
req.Header.Add("User-Agent", userAgent)
167-
req.Header.Add("Cookie", wbrc.getCookie())
168-
resp, err := wbrc.httpClient.Do(req)
156+
req.Header.Add("Cookie", is.getCookie())
157+
resp, err := is.httpClient.Do(req)
169158
if err != nil {
170159
return "", err
171160
}
@@ -187,9 +176,3 @@ func (wbrc *Archiver) getSubmitID(url string) (string, error) {
187176

188177
return id, nil
189178
}
190-
191-
func (wbrc *Archiver) clear() {
192-
baseuri = nil
193-
wbrc.final = ""
194-
wbrc.submitid = ""
195-
}

pkg/is.go

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,28 @@
11
package is
22

33
import (
4+
"context"
5+
"fmt"
46
"log"
7+
"net"
8+
"net/http"
59
"strings"
610
"sync"
711

812
"github.com/wabarc/helper"
913
)
1014

15+
type Archiver struct {
16+
Anyway string
17+
Cookie string
18+
19+
DialContext func(ctx context.Context, network, addr string) (net.Conn, error)
20+
SkipTLSVerification bool
21+
}
22+
1123
// Wayback is the handle of saving webpages to archive.is
1224
func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
13-
collect := make(map[string]string)
25+
collect, results := make(map[string]string), make(map[string]string)
1426
for _, link := range links {
1527
if !helper.IsURL(link) {
1628
log.Print(link + " is invalid url.")
@@ -19,26 +31,40 @@ func (wbrc *Archiver) Wayback(links []string) (map[string]string, error) {
1931
collect[link] = link
2032
}
2133

22-
if client, tor, err := wbrc.newTorClient(); err != nil {
34+
torClient, tor, err := newTorClient()
35+
if err != nil {
2336
log.Println(err)
2437
} else {
25-
wbrc.torClient = client
2638
defer tor.Close()
2739
}
2840

2941
ch := make(chan string, len(collect))
3042
defer close(ch)
3143

44+
var mu sync.Mutex
3245
var wg sync.WaitGroup
3346
for link := range collect {
3447
wg.Add(1)
35-
go func(link string, ch chan string) {
36-
wbrc.fetch(link, ch)
37-
collect[link] = strings.Replace(<-ch, onion, "archive.today", 1)
48+
go func(link string) {
49+
is := &IS{
50+
wbrc: wbrc,
51+
httpClient: &http.Client{Timeout: timeout},
52+
torClient: torClient,
53+
final: "",
54+
submitid: "",
55+
}
56+
is.fetch(link, ch)
57+
mu.Lock()
58+
results[link] = strings.Replace(<-ch, onion, "archive.today", 1)
59+
mu.Unlock()
3860
wg.Done()
39-
}(link, ch)
61+
}(link)
4062
}
4163
wg.Wait()
4264

43-
return collect, nil
65+
if len(results) == 0 {
66+
return results, fmt.Errorf("No results")
67+
}
68+
69+
return results, nil
4470
}

pkg/is_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ func TestWayback(t *testing.T) {
1919
links = []string{"https://www.bbc.com/", "https://www.google.com/"}
2020
got, _ = wbrc.Wayback(links)
2121
if len(got) == 0 {
22-
t.Errorf("got = %d; want not equal 0", len(got))
22+
t.Errorf("got = %d; want greater than 0", len(got))
2323
}
2424

2525
for orig, dest := range got {

pkg/tor.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import (
1515
// "golang.org/x/net/proxy"
1616
)
1717

18-
func (arc *Archiver) newTorClient() (*http.Client, *tor.Tor, error) {
18+
func newTorClient() (*http.Client, *tor.Tor, error) {
1919
// Lookup tor executable file
2020
if _, err := exec.LookPath("tor"); err != nil {
2121
return nil, nil, fmt.Errorf("%w", err)

0 commit comments

Comments
 (0)