Skip to content

Commit 3259bee

Browse files
authored
Merge pull request #285102 from microsoft/dev/dmitriv/fetch-tool-hang
Add timeout for content extraction in fetch tool
2 parents 8b0dc7d + 9a4f9e2 commit 3259bee

File tree

2 files changed

+47
-5
lines changed

2 files changed

+47
-5
lines changed

src/vs/platform/webContentExtractor/electron-main/webPageLoader.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import type { BeforeSendResponse, BrowserWindow, BrowserWindowConstructorOptions, Event, OnBeforeSendHeadersListenerDetails } from 'electron';
77
import { Queue, raceTimeout, TimeoutTimer } from '../../../base/common/async.js';
8+
import { CancellationTokenSource } from '../../../base/common/cancellation.js';
89
import { createSingleCallFunction } from '../../../base/common/functional.js';
910
import { Disposable, toDisposable } from '../../../base/common/lifecycle.js';
1011
import { URI } from '../../../base/common/uri.js';
@@ -27,6 +28,7 @@ export class WebPageLoader extends Disposable {
2728
private static readonly TIMEOUT = 30000; // 30 seconds
2829
private static readonly POST_LOAD_TIMEOUT = 5000; // 5 seconds - increased for dynamic content
2930
private static readonly FRAME_TIMEOUT = 500; // 0.5 seconds
31+
private static readonly EXTRACT_CONTENT_TIMEOUT = 2000; // 2 seconds
3032
private static readonly IDLE_DEBOUNCE_TIME = 500; // 0.5 seconds - wait after last network request
3133
private static readonly MIN_CONTENT_LENGTH = 100; // Minimum content length to consider extraction successful
3234

@@ -329,11 +331,23 @@ export class WebPageLoader extends Disposable {
329331
try {
330332
const title = this._window.webContents.getTitle();
331333

332-
let result = await this.extractAccessibilityTreeContent() ?? '';
333-
if (result.length < WebPageLoader.MIN_CONTENT_LENGTH) {
334-
this.trace(`Accessibility tree extraction yielded insufficient content, trying main DOM element extraction`);
335-
const domContent = await this.extractMainDomElementContent() ?? '';
336-
result = domContent.length > result.length ? domContent : result;
334+
let result = '';
335+
const cts = new CancellationTokenSource();
336+
try {
337+
await raceTimeout((async () => {
338+
if (!cts.token.isCancellationRequested) {
339+
result = await this.extractAccessibilityTreeContent() ?? '';
340+
}
341+
342+
if (!cts.token.isCancellationRequested && result.length < WebPageLoader.MIN_CONTENT_LENGTH) {
343+
this.trace(`Accessibility tree extraction yielded insufficient content, trying main DOM element extraction`);
344+
const domContent = await this.extractMainDomElementContent() ?? '';
345+
result = domContent.length > result.length ? domContent : result;
346+
}
347+
})(), WebPageLoader.EXTRACT_CONTENT_TIMEOUT);
348+
} finally {
349+
cts.cancel();
350+
cts.dispose();
337351
}
338352

339353
if (result.length === 0) {

src/vs/platform/webContentExtractor/test/electron-main/webPageLoader.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,34 @@ suite('WebPageLoader', () => {
815815
assert.ok(window.webContents.executeJavaScript.called);
816816
}));
817817

818+
test('returns error when accessibility tree extraction hangs', () => runWithFakedTimers({ useFakeTimers: true }, async () => {
819+
const uri = URI.parse('https://example.com/page');
820+
const loader = createWebPageLoader(uri);
821+
822+
window.webContents.debugger.sendCommand.callsFake((command: string) => {
823+
switch (command) {
824+
case 'Network.enable':
825+
return Promise.resolve();
826+
case 'Accessibility.getFullAXTree':
827+
// Return a promise that never resolves to simulate hanging
828+
return new Promise(() => { });
829+
default:
830+
assert.fail(`Unexpected command: ${command}`);
831+
}
832+
});
833+
834+
const loadPromise = loader.load();
835+
window.webContents.emit('did-start-loading');
836+
const result = await loadPromise;
837+
838+
assert.strictEqual(result.status, 'error');
839+
if (result.status === 'error') {
840+
assert.ok(result.error.includes('Failed to extract meaningful content'));
841+
}
842+
// Verify executeJavaScript was NOT called for DOM extraction
843+
assert.ok(!window.webContents.executeJavaScript.called);
844+
}));
845+
818846
test('returns error when both accessibility tree and DOM extraction yield no content', () => runWithFakedTimers({ useFakeTimers: true }, async () => {
819847
const uri = URI.parse('https://example.com/empty-page');
820848

0 commit comments

Comments
 (0)