-
Notifications
You must be signed in to change notification settings - Fork 2.6k
fix: improve handling of net::ERR_ABORTED errors in URL fetching #6635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
676a7b7
9715fd0
d71860f
ecc9286
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -100,26 +100,46 @@ export class UrlContentFetcher { | |
| const errorMessage = serializedError.message || String(error) | ||
| const errorName = serializedError.name | ||
|
|
||
| // Only retry for timeout or network-related errors | ||
| const shouldRetry = | ||
| errorMessage.includes("timeout") || | ||
| errorMessage.includes("net::") || | ||
| errorMessage.includes("NetworkError") || | ||
| errorMessage.includes("ERR_") || | ||
| errorName === "TimeoutError" | ||
|
|
||
| if (shouldRetry) { | ||
| // If networkidle2 fails due to timeout/network issues, try with just domcontentloaded as fallback | ||
| console.warn( | ||
| `Failed to load ${url} with networkidle2, retrying with domcontentloaded only: ${errorMessage}`, | ||
| ) | ||
| await this.page.goto(url, { | ||
| timeout: URL_FETCH_FALLBACK_TIMEOUT, | ||
| waitUntil: ["domcontentloaded"], | ||
| }) | ||
| // Special handling for ERR_ABORTED | ||
| if (errorMessage.includes("net::ERR_ABORTED")) { | ||
| console.error(`Navigation to ${url} was aborted: ${errorMessage}`) | ||
| // For ERR_ABORTED, we'll try a more aggressive retry with just domcontentloaded | ||
| // and a shorter timeout to quickly determine if the page is accessible | ||
| try { | ||
| await this.page.goto(url, { | ||
|
||
| timeout: 10000, // 10 seconds for quick check | ||
|
||
| waitUntil: ["domcontentloaded"], | ||
| }) | ||
| } catch (retryError) { | ||
| // If retry also fails, throw a more descriptive error | ||
| const retrySerializedError = serializeError(retryError) | ||
| const retryErrorMessage = retrySerializedError.message || String(retryError) | ||
| throw new Error( | ||
|
||
| `Failed to fetch URL content: ${retryErrorMessage}. The request was aborted, which may indicate the URL is inaccessible or blocked.`, | ||
| ) | ||
| } | ||
| } else { | ||
| // For other errors, throw them as-is | ||
| throw error | ||
| // Only retry for timeout or network-related errors | ||
| const shouldRetry = | ||
| errorMessage.includes("timeout") || | ||
| errorMessage.includes("net::") || | ||
| errorMessage.includes("NetworkError") || | ||
| errorMessage.includes("ERR_") || | ||
| errorName === "TimeoutError" | ||
|
|
||
| if (shouldRetry) { | ||
| // If networkidle2 fails due to timeout/network issues, try with just domcontentloaded as fallback | ||
| console.warn( | ||
| `Failed to load ${url} with networkidle2, retrying with domcontentloaded only: ${errorMessage}`, | ||
| ) | ||
| await this.page.goto(url, { | ||
| timeout: URL_FETCH_FALLBACK_TIMEOUT, | ||
| waitUntil: ["domcontentloaded"], | ||
| }) | ||
| } else { | ||
| // For other errors, throw them as-is | ||
| throw error | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -273,6 +273,36 @@ describe("UrlContentFetcher", () => { | |
| await expect(urlContentFetcher.urlToMarkdown("https://example.com")).rejects.toThrow("Simple string error") | ||
| expect(mockPage.goto).toHaveBeenCalledTimes(1) | ||
| }) | ||
|
|
||
| it("should handle net::ERR_ABORTED with special retry logic", async () => { | ||
|
||
| const abortedError = new Error("net::ERR_ABORTED at https://example.com") | ||
| mockPage.goto.mockRejectedValueOnce(abortedError).mockResolvedValueOnce(undefined) | ||
|
|
||
| const result = await urlContentFetcher.urlToMarkdown("https://example.com") | ||
|
|
||
| expect(mockPage.goto).toHaveBeenCalledTimes(2) | ||
| expect(mockPage.goto).toHaveBeenNthCalledWith(1, "https://example.com", { | ||
| timeout: 30000, | ||
| waitUntil: ["domcontentloaded", "networkidle2"], | ||
| }) | ||
| expect(mockPage.goto).toHaveBeenNthCalledWith(2, "https://example.com", { | ||
| timeout: 10000, | ||
| waitUntil: ["domcontentloaded"], | ||
| }) | ||
| expect(result).toBe("# Test content") | ||
| }) | ||
|
|
||
| it("should throw descriptive error when ERR_ABORTED retry also fails", async () => { | ||
| const abortedError = new Error("net::ERR_ABORTED at https://example.com") | ||
| const retryError = new Error("net::ERR_CONNECTION_REFUSED") | ||
| mockPage.goto.mockRejectedValueOnce(abortedError).mockRejectedValueOnce(retryError) | ||
|
|
||
| await expect(urlContentFetcher.urlToMarkdown("https://example.com")).rejects.toThrow( | ||
| "Failed to fetch URL content: net::ERR_CONNECTION_REFUSED. The request was aborted, which may indicate the URL is inaccessible or blocked.", | ||
| ) | ||
|
|
||
| expect(mockPage.goto).toHaveBeenCalledTimes(2) | ||
| }) | ||
| }) | ||
|
|
||
| describe("closeBrowser", () => { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor: Using
console.errorhere while line 133 usesconsole.warnfor other retries. Should we use consistent logging levels for similar retry operations?