Skip to content

Commit db23675

Browse files
committed
cmd/urlcheck: use custom user-agent to appease Wikipedia's WAF
It appears that Wikipedia started blocking default user-agents as of 08/25/25 [1]. This correlates with the recent avalanche of failures of `TestNightlyLint`. The attempt to request `https://en.wikipedia.org/wiki/Cron` started to fail with 403, ``` HTTP/1.1 403 Forbidden Connection: close Content-Length: 92 Content-Type: text/plain Server: HAProxy X-Analytics: X-Cache: cp1100 int X-Cache-Status: int-tls ``` This change replaces Go's default UA with a custom one. According to Wikipedia's policy [2], we're now in compliance, which is confirmed by running locally and getting 200 instead of 403. [1] https://phabricator.wikimedia.org/T400119 [2] https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy Fixes: #152705 Fixes: #152706 Fixes: #152707 Fixes: #152708 Fixes: #152709 Fixes: #152710 Fixes: #152711 Fixes: #152712 Fixes: #152713 Epic: none Release note: None
1 parent 65f538d commit db23675

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

pkg/cmd/urlcheck/lib/urlcheck/urlcheck.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func chompUnbalanced(left, right rune, s string) string {
104104
}
105105

106106
func checkURL(client *http.Client, url string) error {
107-
resp, err := client.Head(url)
107+
resp, err := httpDo(client, "HEAD", url)
108108
if err != nil {
109109
return err
110110
}
@@ -121,7 +121,7 @@ func checkURL(client *http.Client, url string) error {
121121
// for any other error. Still, we link to several misconfigured servers that
122122
// return 403 Forbidden or 500 Internal Server Error for HEAD requests, but
123123
// not for GET requests.
124-
resp, err = client.Get(url)
124+
resp, err = httpDo(client, "GET", url)
125125
if err != nil {
126126
return err
127127
}
@@ -136,6 +136,21 @@ func checkURL(client *http.Client, url string) error {
136136
return errors.Newf("%s", errors.Safe(resp.Status))
137137
}
138138

139+
// N.B. we set custom User-Agent header to avoid being blocked.
140+
// E.g., as of 08/25/25, Wikipedia blocks default UAs [1].
141+
// [1] https://phabricator.wikimedia.org/T400119
142+
func httpDo(c *http.Client, requestType string, url string) (resp *http.Response, err error) {
143+
req, err := http.NewRequest(requestType, url, nil)
144+
if err != nil {
145+
return nil, err
146+
}
147+
// This UA seems to comply with Wikipedia's policy [1].
148+
// [1] https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy
149+
req.Header.Set("User-Agent", "MyGoApplication/1.0 (https://example.com/myapp; [email protected])")
150+
151+
return c.Do(req)
152+
}
153+
139154
func checkURLWithRetries(client *http.Client, url string) error {
140155
for i := 0; i < timeoutRetries; i++ {
141156
err := checkURL(client, url)

0 commit comments

Comments
 (0)