Skip to content
This repository was archived by the owner on May 20, 2025. It is now read-only.

Commit 66be0f1

Browse files
authored
ci: improve broken link checker resilience (#742)
Enhances the broken link checker by adding retry logic for rate limiting (429) and timeout (408) errors, using exponential backoff. Introduces a mechanism to accept specific rate-limited URLs. Processes links in batches to avoid overwhelming the system. Skips checking empty or already visited URLs.
1 parent 18518d8 commit 66be0f1

File tree

1 file changed

+155
-52
lines changed

1 file changed

+155
-52
lines changed

cypress/e2e/broken-links.cy.ts

Lines changed: 155 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ const REDIRECT_CODES = [301, 302, 304, 307, 308]
77
// other non standard codes, like 999 from linkedin
88
const OTHER_CODES = [999]
99

10+
// URLs that we accept 429s for
11+
const ACCEPTED_RATE_LIMITED_URLS = [
12+
'https://github.com/nitrictech/nitric',
13+
// Add more URLs here as needed
14+
]
15+
1016
const IGNORED_URLS = [
1117
'googleads.g.doubleclick.net',
1218
'youtube.com/api',
@@ -36,16 +42,47 @@ const IGNORED_URLS = [
3642
const rootBaseUrl = Cypress.config('baseUrl')
3743

3844
const isInternalUrl = (url: string) => {
39-
// check against the base url
40-
// and check if the url does not contain a file extension
41-
return url.startsWith(rootBaseUrl) && !url.includes('.')
45+
return (
46+
url.startsWith(rootBaseUrl) || url.startsWith('./') || url.startsWith('../')
47+
)
48+
}
49+
50+
const getCleanInternalUrl = (url: string, currentPage: string) => {
51+
if (url.startsWith(rootBaseUrl)) {
52+
return url.replace(rootBaseUrl, '')
53+
}
54+
55+
// Handle relative paths
56+
if (url.startsWith('./') || url.startsWith('../')) {
57+
// Get the directory of the current page
58+
const currentDir = currentPage.substring(
59+
0,
60+
currentPage.lastIndexOf('/') + 1,
61+
)
62+
// Resolve the relative path
63+
const fullPath = new URL(url, `${rootBaseUrl}${currentDir}`).pathname
64+
return fullPath.replace(rootBaseUrl, '')
65+
}
66+
67+
return url
4268
}
4369

4470
const isExternalUrl = (url: string) => {
4571
return !url.includes('localhost')
4672
}
4773

48-
const req = (url: string, retryCount = 0, followRedirect = false): any => {
74+
const isAcceptedRateLimitedUrl = (url: string) => {
75+
return ACCEPTED_RATE_LIMITED_URLS.some((acceptedUrl) =>
76+
url.startsWith(acceptedUrl),
77+
)
78+
}
79+
80+
const req = (
81+
url: string,
82+
retryCount = 0,
83+
followRedirect = false,
84+
visitedLinks: Record<string, boolean> = {},
85+
): any => {
4986
return cy
5087
.request({
5188
url,
@@ -54,11 +91,34 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
5491
gzip: false,
5592
})
5693
.then((resp) => {
57-
// retry on timeout and too many requests
58-
if ([408, 429].includes(resp.status) && retryCount < 3) {
59-
cy.log(`request ${url} timed out, retrying again...`)
60-
cy.wait(500)
61-
return req(url, retryCount + 1)
94+
// Handle rate limiting (429) with exponential backoff
95+
if (resp.status === 429 && retryCount < 3) {
96+
const retryAfter = resp.headers['retry-after']
97+
? parseInt(
98+
Array.isArray(resp.headers['retry-after'])
99+
? resp.headers['retry-after'][0]
100+
: resp.headers['retry-after'],
101+
)
102+
: null
103+
const waitTime = retryAfter
104+
? retryAfter * 1000
105+
: Math.min(500 * Math.pow(2, retryCount), 5000)
106+
107+
cy.log(
108+
`Rate limited for ${url}, waiting ${waitTime}ms before retry ${retryCount + 1}/3`,
109+
)
110+
cy.wait(waitTime)
111+
return req(url, retryCount + 1, followRedirect, visitedLinks)
112+
}
113+
114+
// Handle timeouts with exponential backoff
115+
if (resp.status === 408 && retryCount < 3) {
116+
const waitTime = Math.min(200 * Math.pow(2, retryCount), 2000)
117+
cy.log(
118+
`Request timeout for ${url}, waiting ${waitTime}ms before retry ${retryCount + 1}/3`,
119+
)
120+
cy.wait(waitTime)
121+
return req(url, retryCount + 1, followRedirect, visitedLinks)
62122
}
63123

64124
return resp
@@ -67,6 +127,7 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
67127

68128
describe('Broken links test suite', () => {
69129
const VISITED_SUCCESSFUL_LINKS = {}
130+
const BATCH_SIZE = 10 // Process links in batches of 10
70131

71132
pages.forEach((page) => {
72133
it(`Should visit page ${page} and check all links`, () => {
@@ -84,61 +145,103 @@ describe('Broken links test suite', () => {
84145
(l) => href?.includes(l) || src?.includes(l),
85146
)
86147
})
87-
.each((link) => {
88-
cy.log(`link: ${link[0].textContent}`)
89-
const baseUrl = link.prop('href') || link.prop('src')
90-
91-
const url = baseUrl.split('#')[0]
92-
93-
if (VISITED_SUCCESSFUL_LINKS[url]) {
94-
cy.log(`link already checked`)
95-
expect(VISITED_SUCCESSFUL_LINKS[url]).to.be.true
96-
} else {
97-
// if the link is internal then check the link against the pages fixture (sitemap)
98-
if (isInternalUrl(url)) {
99-
// clean the url by removing the base url and query params
100-
const rootBaseUrlRegex = new RegExp(`^${rootBaseUrl}`)
101-
let cleanUrl = url.replace(rootBaseUrlRegex, '')
102-
const queryIndex = cleanUrl.indexOf('?')
103-
cleanUrl =
104-
queryIndex !== -1 ? cleanUrl.slice(0, queryIndex) : cleanUrl
105-
106-
cy.log(`checking internal link: ${cleanUrl}`)
107-
if (!pages.includes(cleanUrl)) {
108-
assert.fail(`${cleanUrl} is not part of the pages fixture`)
109-
} else {
110-
VISITED_SUCCESSFUL_LINKS[url] = true
111-
}
148+
.then(($links) => {
149+
const linkPromises = []
150+
const linksToCheck = []
112151

152+
$links.each((_i, link) => {
153+
const baseUrl =
154+
link.getAttribute('href') || link.getAttribute('src')
155+
if (!baseUrl) {
156+
cy.log('Skipping link with no href/src:', link)
113157
return
114158
}
115159

116-
cy.wait(25)
117-
118-
req(url).then((res: Cypress.Response<any>) => {
119-
let acceptableCodes = CORRECT_CODES
120-
if (REDIRECT_CODES.includes(res.status) && !isExternalUrl(url)) {
121-
assert.fail(
122-
`${url} returned ${res.status} to ${res.headers['location']}`,
123-
)
124-
} else {
125-
acceptableCodes = [
126-
...CORRECT_CODES,
127-
...REDIRECT_CODES,
128-
...OTHER_CODES,
129-
]
160+
// Skip if the URL is just a hash fragment
161+
if (baseUrl.startsWith('#')) {
162+
cy.log('Skipping hash fragment:', baseUrl)
163+
return
164+
}
165+
166+
const url = baseUrl.split('#')[0]
167+
if (!url) {
168+
cy.log('Skipping empty URL from:', baseUrl)
169+
return
170+
}
171+
172+
if (VISITED_SUCCESSFUL_LINKS[url]) {
173+
cy.log(`Skipping already checked link: ${url}`)
174+
return
175+
}
176+
177+
linksToCheck.push(url)
178+
})
179+
180+
// Process links in batches
181+
for (let i = 0; i < linksToCheck.length; i += BATCH_SIZE) {
182+
const batch = linksToCheck.slice(i, i + BATCH_SIZE)
183+
const batchPromises = batch.map((url) => {
184+
if (!url) {
185+
cy.log('Skipping empty URL in batch')
186+
return Promise.resolve()
130187
}
131188

132-
if (acceptableCodes.includes(res.status)) {
189+
if (isInternalUrl(url)) {
190+
const cleanUrl = getCleanInternalUrl(url, page)
191+
if (!pages.includes(cleanUrl)) {
192+
assert.fail(`${cleanUrl} is not part of the pages fixture`)
193+
}
133194
VISITED_SUCCESSFUL_LINKS[url] = true
195+
return Promise.resolve()
134196
}
135197

136-
expect(res.status).oneOf(
137-
acceptableCodes,
138-
`${url} returned ${res.status}`,
198+
return req(url, 0, false, VISITED_SUCCESSFUL_LINKS).then(
199+
(res: Cypress.Response<any>) => {
200+
let acceptableCodes = CORRECT_CODES
201+
if (
202+
REDIRECT_CODES.includes(res.status) &&
203+
!isExternalUrl(url)
204+
) {
205+
assert.fail(
206+
`${url} returned ${res.status} to ${res.headers['location']}`,
207+
)
208+
} else if (res.status === 429) {
209+
// After all retries, if we still get a 429, only mark as successful for accepted URLs
210+
if (isAcceptedRateLimitedUrl(url)) {
211+
cy.log(
212+
`Rate limited for accepted URL ${url} after all retries, marking as successful`,
213+
)
214+
VISITED_SUCCESSFUL_LINKS[url] = true
215+
return
216+
} else {
217+
assert.fail(
218+
`${url} returned 429 (Rate Limited) and is not in the accepted list`,
219+
)
220+
}
221+
} else {
222+
acceptableCodes = [
223+
...CORRECT_CODES,
224+
...REDIRECT_CODES,
225+
...OTHER_CODES,
226+
]
227+
}
228+
229+
if (acceptableCodes.includes(res.status)) {
230+
VISITED_SUCCESSFUL_LINKS[url] = true
231+
}
232+
233+
expect(res.status).oneOf(
234+
acceptableCodes,
235+
`${url} returned ${res.status}`,
236+
)
237+
},
139238
)
140239
})
240+
241+
linkPromises.push(Promise.all(batchPromises))
141242
}
243+
244+
return Promise.all(linkPromises)
142245
})
143246
})
144247
})

0 commit comments

Comments
 (0)