Skip to content

Commit d71860f

Browse files
committed
refactor: address review comments for ERR_ABORTED handling
- Extract retry logic into reusable helper method retryNavigation() - Add URL_FETCH_ABORTED_RETRY_TIMEOUT constant (10 seconds) - Change console.error to console.warn for consistency - Preserve specific error types in retry error messages - Add test case for successful retry after ERR_ABORTED
1 parent 9715fd0 commit d71860f

File tree

2 files changed

+53
-16
lines changed

2 files changed

+53
-16
lines changed

src/services/browser/UrlContentFetcher.ts

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { serializeError } from "serialize-error"
1212
// Timeout constants
1313
const URL_FETCH_TIMEOUT = 30_000 // 30 seconds
1414
const URL_FETCH_FALLBACK_TIMEOUT = 20_000 // 20 seconds for fallback
15+
const URL_FETCH_ABORTED_RETRY_TIMEOUT = 10_000 // 10 seconds for ERR_ABORTED retry
1516

1617
interface PCRStats {
1718
puppeteer: { launch: typeof launch }
@@ -79,6 +80,20 @@ export class UrlContentFetcher {
7980
this.page = undefined
8081
}
8182

83+
/**
84+
* Helper method to retry navigation with different parameters
85+
*/
86+
private async retryNavigation(
87+
url: string,
88+
timeout: number,
89+
waitUntil: ("load" | "domcontentloaded" | "networkidle0" | "networkidle2")[],
90+
): Promise<void> {
91+
if (!this.page) {
92+
throw new Error("Page not initialized")
93+
}
94+
await this.page.goto(url, { timeout, waitUntil })
95+
}
96+
8297
// must make sure to call launchBrowser before and closeBrowser after using this
8398
async urlToMarkdown(url: string): Promise<string> {
8499
if (!this.browser || !this.page) {
@@ -90,10 +105,7 @@ export class UrlContentFetcher {
90105
this should be sufficient for most doc sites
91106
*/
92107
try {
93-
await this.page.goto(url, {
94-
timeout: URL_FETCH_TIMEOUT,
95-
waitUntil: ["domcontentloaded", "networkidle2"],
96-
})
108+
await this.retryNavigation(url, URL_FETCH_TIMEOUT, ["domcontentloaded", "networkidle2"])
97109
} catch (error) {
98110
// Use serialize-error to safely extract error information
99111
const serializedError = serializeError(error)
@@ -102,20 +114,22 @@ export class UrlContentFetcher {
102114

103115
// Special handling for ERR_ABORTED
104116
if (errorMessage.includes("net::ERR_ABORTED")) {
105-
console.error(`Navigation to ${url} was aborted: ${errorMessage}`)
117+
console.warn(`Navigation to ${url} was aborted: ${errorMessage}`)
106118
// For ERR_ABORTED, we'll try a more aggressive retry with just domcontentloaded
107119
// and a shorter timeout to quickly determine if the page is accessible
108120
try {
109-
await this.page.goto(url, {
110-
timeout: 10000, // 10 seconds for quick check
111-
waitUntil: ["domcontentloaded"],
112-
})
121+
await this.retryNavigation(url, URL_FETCH_ABORTED_RETRY_TIMEOUT, ["domcontentloaded"])
113122
} catch (retryError) {
114-
// If retry also fails, throw a more descriptive error
123+
// If retry also fails, throw a more descriptive error while preserving the specific error type
115124
const retrySerializedError = serializeError(retryError)
116125
const retryErrorMessage = retrySerializedError.message || String(retryError)
126+
127+
// Extract the specific error type (e.g., ERR_CONNECTION_REFUSED, ERR_TIMEOUT)
128+
const errorTypeMatch = retryErrorMessage.match(/net::ERR_\w+|ERR_\w+/)
129+
const errorType = errorTypeMatch ? errorTypeMatch[0] : "Unknown error"
130+
117131
throw new Error(
118-
`Failed to fetch URL content: ${retryErrorMessage}. The request was aborted, which may indicate the URL is inaccessible or blocked.`,
132+
`Failed to fetch URL content: ${errorType} - ${retryErrorMessage}. The request was aborted, which may indicate the URL is inaccessible or blocked.`,
119133
)
120134
}
121135
} else {
@@ -132,10 +146,7 @@ export class UrlContentFetcher {
132146
console.warn(
133147
`Failed to load ${url} with networkidle2, retrying with domcontentloaded only: ${errorMessage}`,
134148
)
135-
await this.page.goto(url, {
136-
timeout: URL_FETCH_FALLBACK_TIMEOUT,
137-
waitUntil: ["domcontentloaded"],
138-
})
149+
await this.retryNavigation(url, URL_FETCH_FALLBACK_TIMEOUT, ["domcontentloaded"])
139150
} else {
140151
// For other errors, throw them as-is
141152
throw error

src/services/browser/__tests__/UrlContentFetcher.spec.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,11 +298,37 @@ describe("UrlContentFetcher", () => {
298298
mockPage.goto.mockRejectedValueOnce(abortedError).mockRejectedValueOnce(retryError)
299299

300300
await expect(urlContentFetcher.urlToMarkdown("https://example.com")).rejects.toThrow(
301-
"Failed to fetch URL content: net::ERR_CONNECTION_REFUSED. The request was aborted, which may indicate the URL is inaccessible or blocked.",
301+
"Failed to fetch URL content: net::ERR_CONNECTION_REFUSED - net::ERR_CONNECTION_REFUSED. The request was aborted, which may indicate the URL is inaccessible or blocked.",
302302
)
303303

304304
expect(mockPage.goto).toHaveBeenCalledTimes(2)
305305
})
306+
307+
it("should succeed when ERR_ABORTED retry is successful", async () => {
308+
const abortedError = new Error("net::ERR_ABORTED at https://example.com")
309+
// First call fails with ERR_ABORTED, second call (retry) succeeds
310+
mockPage.goto.mockRejectedValueOnce(abortedError).mockResolvedValueOnce(undefined)
311+
312+
const result = await urlContentFetcher.urlToMarkdown("https://example.com")
313+
314+
// Should have called goto twice
315+
expect(mockPage.goto).toHaveBeenCalledTimes(2)
316+
317+
// First call with full wait conditions
318+
expect(mockPage.goto).toHaveBeenNthCalledWith(1, "https://example.com", {
319+
timeout: 30000,
320+
waitUntil: ["domcontentloaded", "networkidle2"],
321+
})
322+
323+
// Second call (retry) with reduced timeout and simpler wait condition
324+
expect(mockPage.goto).toHaveBeenNthCalledWith(2, "https://example.com", {
325+
timeout: 10000,
326+
waitUntil: ["domcontentloaded"],
327+
})
328+
329+
// Should return the markdown content successfully
330+
expect(result).toBe("# Test content")
331+
})
306332
})
307333

308334
describe("closeBrowser", () => {

0 commit comments

Comments
 (0)