Skip to content

Commit f8bb08c

Browse files
authored
Merge pull request #152795 from cockroachdb/blathers/backport-release-25.3-152764
release-25.3: cmd/urlcheck: use custom user-agent to appease Wikipedia's WAF
2 parents 04de51a + f6d3de5 commit f8bb08c

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

pkg/cmd/urlcheck/lib/urlcheck/urlcheck.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func chompUnbalanced(left, right rune, s string) string {
104104
}
105105

106106
func checkURL(client *http.Client, url string) error {
107-
resp, err := client.Head(url)
107+
resp, err := httpDo(client, "HEAD", url)
108108
if err != nil {
109109
return err
110110
}
@@ -121,7 +121,7 @@ func checkURL(client *http.Client, url string) error {
121121
// for any other error. Still, we link to several misconfigured servers that
122122
// return 403 Forbidden or 500 Internal Server Error for HEAD requests, but
123123
// not for GET requests.
124-
resp, err = client.Get(url)
124+
resp, err = httpDo(client, "GET", url)
125125
if err != nil {
126126
return err
127127
}
@@ -136,6 +136,21 @@ func checkURL(client *http.Client, url string) error {
136136
return errors.Newf("%s", errors.Safe(resp.Status))
137137
}
138138

139+
// N.B. we set custom User-Agent header to avoid being blocked.
140+
// E.g., as of 08/25/25, Wikipedia blocks default UAs [1].
141+
// [1] https://phabricator.wikimedia.org/T400119
142+
func httpDo(c *http.Client, requestType string, url string) (resp *http.Response, err error) {
143+
req, err := http.NewRequest(requestType, url, nil)
144+
if err != nil {
145+
return nil, err
146+
}
147+
// This UA seems to comply with Wikipedia's policy [1].
148+
// [1] https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy
149+
req.Header.Set("User-Agent", "MyGoApplication/1.0 (https://example.com/myapp; [email protected])")
150+
151+
return c.Do(req)
152+
}
153+
139154
func checkURLWithRetries(client *http.Client, url string) error {
140155
for i := 0; i < timeoutRetries; i++ {
141156
err := checkURL(client, url)

0 commit comments

Comments
 (0)