Skip to content

Commit 6124a3d

Browse files
committed
add pagination and timeout
1 parent b52bbe9 commit 6124a3d

File tree

2 files changed

+82
-27
lines changed

2 files changed

+82
-27
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ Requires following secrets in your Github repository:
1414
WALLABAG_PASSWORD: Wallabag password
1515
```
1616

17+
Optional environment variables for timeout configuration:
18+
19+
```
20+
WALLABAG_API_TIMEOUT: Timeout for Wallabag API requests in seconds (default: 30)
21+
HTTP_CHECK_TIMEOUT: Timeout for HTTP link checking in seconds (default: 15)
22+
TLS_HANDSHAKE_TIMEOUT: Timeout for TLS handshake in seconds (default: 10)
23+
```
24+
1725
Refer to the [Wallabag Documentation](https://doc.wallabag.org/developer/api/oauth/) to create API credentials on your instance.
1826

1927
After run you can see job output for results or check tagged articles in your Wallabag instance with `dead`
@@ -44,7 +52,7 @@ jobs:
4452
4553
## tipps & tricks
4654
47-
* wallabag-checklinks is limited to 10.000 entries
55+
* wallabag-checklinks now uses pagination to fetch all entries without limit (previously limited to 10,000 entries)
4856
* your weblinks will exposed if the Github repo is public, be careful. Use private repo or use wallabag-checklinks locally, look at the [release page](https://github.com/eumel8/wallabag-checklinks/releases) for binaries.
4957
5058
## credits

main.go

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"log"
77
"net/http"
88
"os"
9+
"strconv"
910
"time"
1011

1112
"github.com/go-resty/resty/v2"
@@ -24,6 +25,9 @@ type AuthResponse struct {
2425
}
2526

2627
type EntryList struct {
28+
Page int `json:"page"`
29+
Pages int `json:"pages"`
30+
Total int `json:"total"`
2731
Embedded struct {
2832
Items []struct {
2933
ID int `json:"id"`
@@ -51,6 +55,27 @@ func getEnvOrFail(key string) string {
5155
return value
5256
}
5357

58+
func getEnvOrDefault(key string, defaultValue string) string {
59+
value := os.Getenv(key)
60+
if value == "" {
61+
return defaultValue
62+
}
63+
return value
64+
}
65+
66+
func getEnvIntOrDefault(key string, defaultValue int) int {
67+
value := os.Getenv(key)
68+
if value == "" {
69+
return defaultValue
70+
}
71+
intValue, err := strconv.Atoi(value)
72+
if err != nil {
73+
log.Printf("⚠️ Invalid value for %s, using default: %d", key, defaultValue)
74+
return defaultValue
75+
}
76+
return intValue
77+
}
78+
5479
func getAccessToken(client *resty.Client) (string, error) {
5580
resp, err := client.R().
5681
SetFormData(map[string]string{
@@ -77,41 +102,58 @@ func getAccessToken(client *resty.Client) (string, error) {
77102
}
78103

79104
func getEntries(client *resty.Client, token string) ([]Entry, error) {
80-
var entries EntryList
105+
var result []Entry
106+
page := 1
107+
perPage := 100 // Reasonable page size
108+
109+
for {
110+
var entries EntryList
111+
112+
_, err := client.R().
113+
SetHeader("Authorization", "Bearer "+token).
114+
SetQueryParams(map[string]string{
115+
"page": fmt.Sprintf("%d", page),
116+
"perPage": fmt.Sprintf("%d", perPage),
117+
}).
118+
SetResult(&entries).
119+
Get(wallabagURL + "/api/entries.json")
120+
121+
if err != nil {
122+
return nil, err
123+
}
81124

82-
_, err := client.R().
83-
SetHeader("Authorization", "Bearer "+token).
84-
SetQueryParams(map[string]string{
85-
"perPage": "10000", // Adjust for more
86-
}).
87-
SetResult(&entries).
88-
Get(wallabagURL + "/api/entries.json")
125+
// Process entries from this page
126+
for _, item := range entries.Embedded.Items {
127+
tags := []string{}
128+
for _, tag := range item.Tags {
129+
tags = append(tags, tag.Label)
130+
}
131+
result = append(result, Entry{
132+
ID: item.ID,
133+
URL: item.URL,
134+
Tags: tags,
135+
})
136+
}
89137

90-
if err != nil {
91-
return nil, err
92-
}
138+
// Log progress
139+
fmt.Printf("📥 Fetched page %d/%d (%d entries so far)\n", page, entries.Pages, len(result))
93140

94-
var result []Entry
95-
for _, item := range entries.Embedded.Items {
96-
tags := []string{}
97-
for _, tag := range item.Tags {
98-
tags = append(tags, tag.Label)
141+
// Check if we've fetched all pages
142+
if page >= entries.Pages {
143+
break
99144
}
100-
result = append(result, Entry{
101-
ID: item.ID,
102-
URL: item.URL,
103-
Tags: tags,
104-
})
145+
146+
page++
105147
}
106148

107149
return result, nil
108150
}
109151

110-
func checkURL(url string) int {
152+
func checkURL(url string, timeout int, tlsTimeout int) int {
111153
client := &http.Client{
112-
Timeout: 15 * time.Second,
154+
Timeout: time.Duration(timeout) * time.Second,
113155
Transport: &http.Transport{
114-
TLSHandshakeTimeout: 10 * time.Second,
156+
TLSHandshakeTimeout: time.Duration(tlsTimeout) * time.Second,
115157
IdleConnTimeout: 90 * time.Second,
116158
},
117159
}
@@ -215,8 +257,13 @@ func containsTag(tags []string, target string) bool {
215257
}
216258

217259
func main() {
260+
// Get configurable timeouts with defaults
261+
apiTimeout := getEnvIntOrDefault("WALLABAG_API_TIMEOUT", 30)
262+
httpTimeout := getEnvIntOrDefault("HTTP_CHECK_TIMEOUT", 15)
263+
tlsTimeout := getEnvIntOrDefault("TLS_HANDSHAKE_TIMEOUT", 10)
264+
218265
restyClient := resty.New().
219-
SetTimeout(30 * time.Second).
266+
SetTimeout(time.Duration(apiTimeout) * time.Second).
220267
SetRetryCount(3).
221268
SetRetryWaitTime(5 * time.Second)
222269

@@ -233,7 +280,7 @@ func main() {
233280
fmt.Printf("🔎 Checking %d URLs...\n\n", len(entries))
234281

235282
for _, entry := range entries {
236-
status := checkURL(entry.URL)
283+
status := checkURL(entry.URL, httpTimeout, tlsTimeout)
237284
hasDeadTag := containsTag(entry.Tags, "dead")
238285

239286
if status == 0 {

0 commit comments

Comments
 (0)