Skip to content

Commit 71436fa

Browse files
authored
Phase 2: Replace got with fetch in retry/timeout files (#57053)
1 parent 5a757c8 commit 71436fa

File tree

6 files changed

+240
-60
lines changed

6 files changed

+240
-60
lines changed

src/archives/middleware/archived-enterprise-versions-assets.ts

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import got from 'got'
1+
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
22
import type { Response, NextFunction } from 'express'
33

44
import patterns from '@/frame/lib/patterns'
@@ -85,11 +85,31 @@ export default async function archivedEnterpriseVersionsAssets(
8585

8686
const proxyPath = `https://github.github.com/docs-ghes-${requestedVersion}${assetPath}`
8787
try {
88-
const r = await got(proxyPath)
88+
const r = await fetchWithRetry(
89+
proxyPath,
90+
{},
91+
{
92+
retries: 0,
93+
throwHttpErrors: true,
94+
},
95+
)
96+
97+
const body = await r.arrayBuffer()
8998

9099
res.set('accept-ranges', 'bytes')
91-
res.set('content-type', r.headers['content-type'])
92-
res.set('content-length', r.headers['content-length'])
100+
const contentType = r.headers.get('content-type')
101+
if (contentType) {
102+
// Match got's behavior by adding charset=utf-8 to SVG files
103+
if (contentType === 'image/svg+xml') {
104+
res.set('content-type', `${contentType}; charset=utf-8`)
105+
} else {
106+
res.set('content-type', contentType)
107+
}
108+
}
109+
const contentLength = r.headers.get('content-length')
110+
if (contentLength) {
111+
res.set('content-length', contentLength)
112+
}
93113
res.set('x-is-archived', 'true')
94114
res.set('x-robots-tag', 'noindex')
95115

@@ -98,7 +118,7 @@ export default async function archivedEnterpriseVersionsAssets(
98118
archivedCacheControl(res)
99119
setFastlySurrogateKey(res, SURROGATE_ENUMS.MANUAL)
100120

101-
return res.send(r.body)
121+
return res.send(Buffer.from(body))
102122
} catch (err) {
103123
// Primarily for the developers working on tests that mock
104124
// requests. If you don't set up `nock` correctly, you might

src/archives/middleware/archived-enterprise-versions.ts

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { Response, NextFunction } from 'express'
2-
import got from 'got'
2+
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
33

44
import statsd from '@/observability/lib/statsd'
55
import {
@@ -190,33 +190,38 @@ export default async function archivedEnterpriseVersions(
190190
}
191191
// Retrieve the page from the archived repo
192192
const doGet = () =>
193-
got(getProxyPath(req.path, requestedVersion), {
194-
throwHttpErrors: false,
195-
retry: retryConfiguration,
196-
timeout: timeoutConfiguration,
197-
})
193+
fetchWithRetry(
194+
getProxyPath(req.path, requestedVersion),
195+
{},
196+
{
197+
retries: retryConfiguration.limit,
198+
timeout: timeoutConfiguration.response,
199+
throwHttpErrors: false,
200+
},
201+
)
198202

199203
const statsdTags = [`version:${requestedVersion}`]
200204
const r = await statsd.asyncTimer(doGet, 'archive_enterprise_proxy', [
201205
...statsdTags,
202206
`path:${req.path}`,
203207
])()
204208

205-
if (r.statusCode === 200) {
209+
if (r.status === 200) {
210+
const body = await r.text()
206211
const [, withoutLanguagePath] = splitByLanguage(req.path)
207212
const isDeveloperPage = withoutLanguagePath?.startsWith(
208213
`/enterprise/${requestedVersion}/developer`,
209214
)
210215
res.set('x-robots-tag', 'noindex')
211216

212217
// make stubbed redirect files (which exist in versions <2.13) redirect with a 301
213-
const staticRedirect = r.body.match(patterns.staticRedirect)
218+
const staticRedirect = body.match(patterns.staticRedirect)
214219
if (staticRedirect) {
215220
cacheAggressively(res)
216221
return res.redirect(redirectCode, staticRedirect[1])
217222
}
218223

219-
res.set('content-type', r.headers['content-type'])
224+
res.set('content-type', r.headers.get('content-type') || '')
220225

221226
cacheAggressively(res)
222227

@@ -230,7 +235,7 @@ export default async function archivedEnterpriseVersions(
230235
// `x-host` is a custom header set by Fastly.
231236
// GLB automatically deletes the `x-forwarded-host` header.
232237
const host = req.get('x-host') || req.get('x-forwarded-host') || req.get('host')
233-
r.body = r.body
238+
let modifiedBody = body
234239
.replaceAll(
235240
`${OLD_AZURE_BLOB_ENTERPRISE_DIR}/${requestedVersion}/assets/cb-`,
236241
`${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}/assets/cb-`,
@@ -239,6 +244,8 @@ export default async function archivedEnterpriseVersions(
239244
`${OLD_AZURE_BLOB_ENTERPRISE_DIR}/${requestedVersion}/`,
240245
`${req.protocol}://${host}/enterprise-server@${requestedVersion}/`,
241246
)
247+
248+
return res.send(modifiedBody)
242249
}
243250

244251
// Releases 3.1 and lower were previously hosted in the
@@ -247,23 +254,42 @@ export default async function archivedEnterpriseVersions(
247254
// The image paths all need to be updated to reference the images in the
248255
// new archived enterprise repo's root assets directory.
249256
if (versionSatisfiesRange(requestedVersion, `<${firstReleaseStoredInBlobStorage}`)) {
250-
r.body = r.body.replaceAll(
257+
let modifiedBody = body.replaceAll(
251258
`${OLD_GITHUB_IMAGES_ENTERPRISE_DIR}/${requestedVersion}`,
252259
`${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}`,
253260
)
254261
if (versionSatisfiesRange(requestedVersion, '<=2.18') && isDeveloperPage) {
255-
r.body = r.body.replaceAll(
262+
modifiedBody = modifiedBody.replaceAll(
256263
`${OLD_DEVELOPER_SITE_CONTAINER}/${requestedVersion}`,
257264
`${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}/developer`,
258265
)
259266
// Update all hrefs to add /developer to the path
260-
r.body = r.body.replaceAll(
267+
modifiedBody = modifiedBody.replaceAll(
261268
`="/enterprise/${requestedVersion}`,
262269
`="/enterprise/${requestedVersion}/developer`,
263270
)
264271
// The changelog is the only thing remaining on developer.github.com
265-
r.body = r.body.replaceAll('href="/changes', 'href="https://developer.github.com/changes')
272+
modifiedBody = modifiedBody.replaceAll(
273+
'href="/changes',
274+
'href="https://developer.github.com/changes',
275+
)
266276
}
277+
278+
// Continue with remaining replacements
279+
modifiedBody = modifiedBody.replaceAll(
280+
/="(\.\.\/)*assets/g,
281+
`="${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}/assets`,
282+
)
283+
284+
// Fix broken hrefs on the 2.16 landing page
285+
if (requestedVersion === '2.16' && req.path === '/en/enterprise/2.16') {
286+
modifiedBody = modifiedBody.replaceAll('ref="/en/enterprise', 'ref="/en/enterprise/2.16')
287+
}
288+
289+
// Remove the search results container from the page
290+
modifiedBody = modifiedBody.replaceAll('<div id="search-results-container"></div>', '')
291+
292+
return res.send(modifiedBody)
267293
}
268294

269295
// In all releases, some assets were incorrectly scraped and contain
@@ -275,21 +301,21 @@ export default async function archivedEnterpriseVersions(
275301
// We want to update the URLs in the format
276302
// "../../../../../../assets/" to prefix the assets directory with the
277303
// new archived enterprise repo URL.
278-
r.body = r.body.replaceAll(
304+
let modifiedBody = body.replaceAll(
279305
/="(\.\.\/)*assets/g,
280306
`="${ENTERPRISE_GH_PAGES_URL_PREFIX}${requestedVersion}/assets`,
281307
)
282308

283309
// Fix broken hrefs on the 2.16 landing page
284310
if (requestedVersion === '2.16' && req.path === '/en/enterprise/2.16') {
285-
r.body = r.body.replaceAll('ref="/en/enterprise', 'ref="/en/enterprise/2.16')
311+
modifiedBody = modifiedBody.replaceAll('ref="/en/enterprise', 'ref="/en/enterprise/2.16')
286312
}
287313

288314
// Remove the search results container from the page, which removes a white
289315
// box that prevents clicking on page links
290-
r.body = r.body.replaceAll('<div id="search-results-container"></div>', '')
316+
modifiedBody = modifiedBody.replaceAll('<div id="search-results-container"></div>', '')
291317

292-
return res.send(r.body)
318+
return res.send(modifiedBody)
293319
}
294320
// In releases 2.13 - 2.17, we lost access to frontmatter redirects
295321
// during the archival process. This workaround finds potentially

src/frame/lib/fetch-utils.ts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/**
2+
* Utility functions for fetch with retry and timeout functionality
3+
* to replace got library functionality
4+
*/
5+
6+
export interface FetchWithRetryOptions {
7+
retries?: number
8+
retryDelay?: number
9+
timeout?: number
10+
throwHttpErrors?: boolean
11+
}
12+
13+
/**
14+
* Default retry delay calculation matching got's behavior:
15+
* sleep = 1000 * Math.pow(2, retry - 1) + Math.random() * 100
16+
*/
17+
function calculateDefaultDelay(attempt: number): number {
18+
return 1000 * Math.pow(2, attempt - 1) + Math.random() * 100
19+
}
20+
21+
/**
22+
* Sleep for a given number of milliseconds
23+
*/
24+
function sleep(ms: number): Promise<void> {
25+
return new Promise((resolve) => setTimeout(resolve, ms))
26+
}
27+
28+
/**
29+
* Fetch with timeout support
30+
*/
31+
async function fetchWithTimeout(
32+
url: string | URL,
33+
init?: RequestInit,
34+
timeout?: number,
35+
): Promise<Response> {
36+
if (!timeout) {
37+
return fetch(url, init)
38+
}
39+
40+
const controller = new AbortController()
41+
const timeoutId = setTimeout(() => controller.abort(), timeout)
42+
43+
try {
44+
const response = await fetch(url, {
45+
...init,
46+
signal: controller.signal,
47+
})
48+
clearTimeout(timeoutId)
49+
return response
50+
} catch (error) {
51+
clearTimeout(timeoutId)
52+
if (error instanceof Error && error.name === 'AbortError') {
53+
throw new Error(`Request timed out after ${timeout}ms`)
54+
}
55+
throw error
56+
}
57+
}
58+
59+
/**
60+
* Fetch with retry logic matching got's behavior
61+
*/
62+
export async function fetchWithRetry(
63+
url: string | URL,
64+
init?: RequestInit,
65+
options: FetchWithRetryOptions = {},
66+
): Promise<Response> {
67+
const { retries = 0, timeout, throwHttpErrors = true } = options
68+
69+
let lastError: Error | null = null
70+
71+
for (let attempt = 0; attempt <= retries; attempt++) {
72+
try {
73+
const response = await fetchWithTimeout(url, init, timeout)
74+
75+
// Check if we should retry based on status code
76+
if (response.status >= 500 && attempt < retries) {
77+
lastError = new Error(`HTTP ${response.status}: ${response.statusText}`)
78+
const delay = calculateDefaultDelay(attempt + 1)
79+
await sleep(delay)
80+
continue
81+
}
82+
83+
// If throwHttpErrors is true and status indicates an error
84+
if (throwHttpErrors && !response.ok && response.status >= 400) {
85+
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
86+
}
87+
88+
return response
89+
} catch (error) {
90+
lastError = error instanceof Error ? error : new Error(String(error))
91+
92+
// Don't retry on the last attempt
93+
if (attempt === retries) {
94+
throw lastError
95+
}
96+
97+
// Don't retry on client errors (4xx) unless it's specific ones
98+
if (
99+
error instanceof Error &&
100+
error.message.includes('HTTP 4') &&
101+
!error.message.includes('HTTP 429')
102+
) {
103+
throw lastError
104+
}
105+
106+
// Calculate delay and wait before retry
107+
const delay = calculateDefaultDelay(attempt + 1)
108+
await sleep(delay)
109+
}
110+
}
111+
112+
throw lastError || new Error('Maximum retries exceeded')
113+
}

src/frame/lib/get-remote-json.js

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import path from 'path'
22
import fs from 'fs'
33
import crypto from 'crypto'
44

5-
import got from 'got'
5+
import { fetchWithRetry } from './fetch-utils'
66
import statsd from '@/observability/lib/statsd'
77

88
// The only reason this is exported is for the sake of the unit tests'
@@ -65,23 +65,36 @@ export default async function getRemoteJSON(url, config) {
6565
}
6666

6767
if (!foundOnDisk) {
68-
// got will, by default, follow redirects and it will throw if the ultimate
68+
// fetch will, by default, follow redirects and fetchWithRetry will throw if the ultimate
6969
// response is not a 2xx.
7070
// But it's possible that the page is a 200 OK but it's just not a JSON
7171
// page at all. Then we can't assume we can deserialize it.
72-
const res = await got(url, config)
73-
if (!res.headers['content-type'].startsWith('application/json')) {
74-
throw new Error(
75-
`Fetching '${url}' resulted in a non-JSON response (${res.headers['content-type']})`,
76-
)
72+
const retries = config?.retry?.limit || 0
73+
const timeout = config?.timeout?.response
74+
75+
const res = await fetchWithRetry(
76+
url,
77+
{},
78+
{
79+
retries,
80+
timeout,
81+
throwHttpErrors: true,
82+
},
83+
)
84+
85+
const contentType = res.headers.get('content-type')
86+
if (!contentType || !contentType.startsWith('application/json')) {
87+
throw new Error(`Fetching '${url}' resulted in a non-JSON response (${contentType})`)
7788
}
78-
cache.set(cacheKey, JSON.parse(res.body))
89+
90+
const body = await res.text()
91+
cache.set(cacheKey, JSON.parse(body))
7992

8093
// Only write to disk for testing and local review.
8194
// In production, we never write to disk. Only in-memory.
8295
if (!inProd) {
8396
fs.mkdirSync(path.dirname(onDisk), { recursive: true })
84-
fs.writeFileSync(onDisk, res.body, 'utf-8')
97+
fs.writeFileSync(onDisk, body, 'utf-8')
8598
}
8699
}
87100
}

0 commit comments

Comments
 (0)