Skip to content
This repository was archived by the owner on May 20, 2025. It is now read-only.

Commit 4226028

Browse files
committed
Improves broken link checker resilience
Enhances the broken link checker by adding retry logic for rate limiting (429) and timeout (408) errors, using exponential backoff. Introduces a mechanism to accept specific rate-limited URLs. Processes links in batches to avoid overwhelming the system. Skips checking empty or already visited URLs.
1 parent 18518d8 commit 4226028

File tree

1 file changed

+137
-49
lines changed

1 file changed

+137
-49
lines changed

cypress/e2e/broken-links.cy.ts

Lines changed: 137 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ const REDIRECT_CODES = [301, 302, 304, 307, 308]
77
// other non standard codes, like 999 from linkedin
88
const OTHER_CODES = [999]
99

10+
// URLs that we accept 429s for
11+
const ACCEPTED_RATE_LIMITED_URLS = [
12+
'https://github.com/nitrictech/nitric',
13+
// Add more URLs here as needed
14+
]
15+
1016
const IGNORED_URLS = [
1117
'googleads.g.doubleclick.net',
1218
'youtube.com/api',
@@ -45,7 +51,18 @@ const isExternalUrl = (url: string) => {
4551
return !url.includes('localhost')
4652
}
4753

48-
const req = (url: string, retryCount = 0, followRedirect = false): any => {
54+
const isAcceptedRateLimitedUrl = (url: string) => {
55+
return ACCEPTED_RATE_LIMITED_URLS.some((acceptedUrl) =>
56+
url.startsWith(acceptedUrl),
57+
)
58+
}
59+
60+
const req = (
61+
url: string,
62+
retryCount = 0,
63+
followRedirect = false,
64+
visitedLinks: Record<string, boolean> = {},
65+
): any => {
4966
return cy
5067
.request({
5168
url,
@@ -54,11 +71,34 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
5471
gzip: false,
5572
})
5673
.then((resp) => {
57-
// retry on timeout and too many requests
58-
if ([408, 429].includes(resp.status) && retryCount < 3) {
59-
cy.log(`request ${url} timed out, retrying again...`)
60-
cy.wait(500)
61-
return req(url, retryCount + 1)
74+
// Handle rate limiting (429) with exponential backoff
75+
if (resp.status === 429 && retryCount < 5) {
76+
const retryAfter = resp.headers['retry-after']
77+
? parseInt(
78+
Array.isArray(resp.headers['retry-after'])
79+
? resp.headers['retry-after'][0]
80+
: resp.headers['retry-after'],
81+
)
82+
: null
83+
const waitTime = retryAfter
84+
? retryAfter * 1000
85+
: Math.min(500 * Math.pow(2, retryCount), 5000)
86+
87+
cy.log(
88+
`Rate limited for ${url}, waiting ${waitTime}ms before retry ${retryCount + 1}/5`,
89+
)
90+
cy.wait(waitTime)
91+
return req(url, retryCount + 1, followRedirect, visitedLinks)
92+
}
93+
94+
// Handle timeouts with exponential backoff
95+
if (resp.status === 408 && retryCount < 3) {
96+
const waitTime = Math.min(200 * Math.pow(2, retryCount), 2000)
97+
cy.log(
98+
`Request timeout for ${url}, waiting ${waitTime}ms before retry ${retryCount + 1}/3`,
99+
)
100+
cy.wait(waitTime)
101+
return req(url, retryCount + 1, followRedirect, visitedLinks)
62102
}
63103

64104
return resp
@@ -67,6 +107,7 @@ const req = (url: string, retryCount = 0, followRedirect = false): any => {
67107

68108
describe('Broken links test suite', () => {
69109
const VISITED_SUCCESSFUL_LINKS = {}
110+
const BATCH_SIZE = 10 // Process links in batches of 10
70111

71112
pages.forEach((page) => {
72113
it(`Should visit page ${page} and check all links`, () => {
@@ -84,61 +125,108 @@ describe('Broken links test suite', () => {
84125
(l) => href?.includes(l) || src?.includes(l),
85126
)
86127
})
87-
.each((link) => {
88-
cy.log(`link: ${link[0].textContent}`)
89-
const baseUrl = link.prop('href') || link.prop('src')
90-
91-
const url = baseUrl.split('#')[0]
92-
93-
if (VISITED_SUCCESSFUL_LINKS[url]) {
94-
cy.log(`link already checked`)
95-
expect(VISITED_SUCCESSFUL_LINKS[url]).to.be.true
96-
} else {
97-
// if the link is internal then check the link against the pages fixture (sitemap)
98-
if (isInternalUrl(url)) {
99-
// clean the url by removing the base url and query params
100-
const rootBaseUrlRegex = new RegExp(`^${rootBaseUrl}`)
101-
let cleanUrl = url.replace(rootBaseUrlRegex, '')
102-
const queryIndex = cleanUrl.indexOf('?')
103-
cleanUrl =
104-
queryIndex !== -1 ? cleanUrl.slice(0, queryIndex) : cleanUrl
105-
106-
cy.log(`checking internal link: ${cleanUrl}`)
107-
if (!pages.includes(cleanUrl)) {
108-
assert.fail(`${cleanUrl} is not part of the pages fixture`)
109-
} else {
110-
VISITED_SUCCESSFUL_LINKS[url] = true
111-
}
128+
.then(($links) => {
129+
const linkPromises = []
130+
const linksToCheck = []
131+
132+
$links.each((_i, link) => {
133+
const baseUrl =
134+
link.getAttribute('href') || link.getAttribute('src')
135+
if (!baseUrl) {
136+
cy.log('Skipping link with no href/src:', link)
137+
return
138+
}
112139

140+
// Skip if the URL is just a hash fragment
141+
if (baseUrl.startsWith('#')) {
142+
cy.log('Skipping hash fragment:', baseUrl)
113143
return
114144
}
115145

116-
cy.wait(25)
117-
118-
req(url).then((res: Cypress.Response<any>) => {
119-
let acceptableCodes = CORRECT_CODES
120-
if (REDIRECT_CODES.includes(res.status) && !isExternalUrl(url)) {
121-
assert.fail(
122-
`${url} returned ${res.status} to ${res.headers['location']}`,
123-
)
124-
} else {
125-
acceptableCodes = [
126-
...CORRECT_CODES,
127-
...REDIRECT_CODES,
128-
...OTHER_CODES,
129-
]
146+
const url = baseUrl.split('#')[0]
147+
if (!url) {
148+
cy.log('Skipping empty URL from:', baseUrl)
149+
return
150+
}
151+
152+
if (VISITED_SUCCESSFUL_LINKS[url]) {
153+
cy.log(`Skipping already checked link: ${url}`)
154+
return
155+
}
156+
157+
linksToCheck.push(url)
158+
})
159+
160+
// Process links in batches
161+
for (let i = 0; i < linksToCheck.length; i += BATCH_SIZE) {
162+
const batch = linksToCheck.slice(i, i + BATCH_SIZE)
163+
const batchPromises = batch.map((url) => {
164+
if (!url) {
165+
cy.log('Skipping empty URL in batch')
166+
return Promise.resolve()
130167
}
131168

132-
if (acceptableCodes.includes(res.status)) {
169+
if (isInternalUrl(url)) {
170+
const rootBaseUrlRegex = new RegExp(`^${rootBaseUrl}`)
171+
let cleanUrl = url.replace(rootBaseUrlRegex, '')
172+
const queryIndex = cleanUrl.indexOf('?')
173+
cleanUrl =
174+
queryIndex !== -1 ? cleanUrl.slice(0, queryIndex) : cleanUrl
175+
176+
if (!pages.includes(cleanUrl)) {
177+
assert.fail(`${cleanUrl} is not part of the pages fixture`)
178+
}
133179
VISITED_SUCCESSFUL_LINKS[url] = true
180+
return Promise.resolve()
134181
}
135182

136-
expect(res.status).oneOf(
137-
acceptableCodes,
138-
`${url} returned ${res.status}`,
183+
return req(url, 0, false, VISITED_SUCCESSFUL_LINKS).then(
184+
(res: Cypress.Response<any>) => {
185+
let acceptableCodes = CORRECT_CODES
186+
if (
187+
REDIRECT_CODES.includes(res.status) &&
188+
!isExternalUrl(url)
189+
) {
190+
assert.fail(
191+
`${url} returned ${res.status} to ${res.headers['location']}`,
192+
)
193+
} else if (res.status === 429) {
194+
// After all retries, if we still get a 429, only mark as successful for accepted URLs
195+
if (isAcceptedRateLimitedUrl(url)) {
196+
cy.log(
197+
`Rate limited for accepted URL ${url} after all retries, marking as successful`,
198+
)
199+
VISITED_SUCCESSFUL_LINKS[url] = true
200+
return
201+
} else {
202+
assert.fail(
203+
`${url} returned 429 (Rate Limited) and is not in the accepted list`,
204+
)
205+
}
206+
} else {
207+
acceptableCodes = [
208+
...CORRECT_CODES,
209+
...REDIRECT_CODES,
210+
...OTHER_CODES,
211+
]
212+
}
213+
214+
if (acceptableCodes.includes(res.status)) {
215+
VISITED_SUCCESSFUL_LINKS[url] = true
216+
}
217+
218+
expect(res.status).oneOf(
219+
acceptableCodes,
220+
`${url} returned ${res.status}`,
221+
)
222+
},
139223
)
140224
})
225+
226+
linkPromises.push(Promise.all(batchPromises))
141227
}
228+
229+
return Promise.all(linkPromises)
142230
})
143231
})
144232
})

0 commit comments

Comments
 (0)