Skip to content

Commit f673cdd

Browse files
CopilotTylerLeonhardtconnor4312
authored
Fix fetch tool to fallback to file service for non-http schemes (microsoft#252683)
* Initial plan * Fix fetch tool to only handle HTTP/HTTPS schemes This prevents MCP resource URIs (like test://static/resource/50) from being incorrectly processed by the fetch tool. MCP resources should be handled by VS Code's filesystem providers instead. - Modified FetchWebPageTool._parseUris() to filter out non-HTTP(S) schemes - Added comprehensive tests for the new scheme filtering behavior - Non-HTTP(S) URIs will now be treated as invalid and skipped Co-authored-by: TylerLeonhardt <[email protected]> * Refactor tests to use TestWebContentExtractorService instead of testing private methods Co-authored-by: TylerLeonhardt <[email protected]> * Enhance fetch tool to serve file content via IFileService for non-HTTP schemes Co-authored-by: connor4312 <[email protected]> * Revert tool description to original text as requested Co-authored-by: TylerLeonhardt <[email protected]> * Implement FileService-based WebContentExtractorService for web platform Co-authored-by: TylerLeonhardt <[email protected]> * Handle binary data * style * don't adopt this yet --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: TylerLeonhardt <[email protected]> Co-authored-by: connor4312 <[email protected]> Co-authored-by: Tyler Leonhardt <[email protected]>
1 parent a99ad9a commit f673cdd

File tree

2 files changed

+378
-41
lines changed

2 files changed

+378
-41
lines changed

src/vs/workbench/contrib/chat/electron-browser/tools/fetchPageTool.ts

Lines changed: 99 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@ import { MarkdownString } from '../../../../../base/common/htmlContent.js';
88
import { ResourceSet } from '../../../../../base/common/map.js';
99
import { URI } from '../../../../../base/common/uri.js';
1010
import { localize } from '../../../../../nls.js';
11+
import { IFileService } from '../../../../../platform/files/common/files.js';
1112
import { IWebContentExtractorService } from '../../../../../platform/webContentExtractor/common/webContentExtractor.js';
1213
import { CountTokensCallback, IPreparedToolInvocation, IToolData, IToolImpl, IToolInvocation, IToolInvocationPreparationContext, IToolResult, IToolResultTextPart, ToolDataSource, ToolProgress } from '../../common/languageModelToolsService.js';
1314
import { InternalFetchWebPageToolId } from '../../common/tools/tools.js';
15+
import { detectEncodingFromBuffer } from '../../../../services/textfile/common/encoding.js';
1416

1517
export const FetchWebPageToolData: IToolData = {
1618
id: InternalFetchWebPageToolId,
@@ -38,78 +40,125 @@ export class FetchWebPageTool implements IToolImpl {
3840

3941
constructor(
4042
@IWebContentExtractorService private readonly _readerModeService: IWebContentExtractorService,
43+
@IFileService private readonly _fileService: IFileService,
4144
) { }
4245

43-
async invoke(invocation: IToolInvocation, _countTokens: CountTokensCallback, _progress: ToolProgress, _token: CancellationToken): Promise<IToolResult> {
44-
const parsedUriResults = this._parseUris((invocation.parameters as { urls?: string[] }).urls);
45-
const validUris = Array.from(parsedUriResults.values()).filter((uri): uri is URI => !!uri);
46-
if (!validUris.length) {
46+
async invoke(invocation: IToolInvocation, _countTokens: CountTokensCallback, _progress: ToolProgress, token: CancellationToken): Promise<IToolResult> {
47+
const urls = (invocation.parameters as { urls?: string[] }).urls || [];
48+
const { webUris, fileUris, invalidUris } = this._parseUris(urls);
49+
const allValidUris = [...webUris.values(), ...fileUris.values()];
50+
51+
if (!allValidUris.length && invalidUris.size === 0) {
4752
return {
4853
content: [{ kind: 'text', value: localize('fetchWebPage.noValidUrls', 'No valid URLs provided.') }]
4954
};
5055
}
5156

5257
// We approved these via confirmation, so mark them as "approved" in this session
5358
// if they are not approved via the trusted domain service.
54-
for (const uri of validUris) {
59+
for (const uri of webUris.values()) {
5560
this._alreadyApprovedDomains.add(uri);
5661
}
5762

58-
const contents = await this._readerModeService.extract(validUris);
59-
// Make an array that contains either the content or undefined for invalid URLs
60-
const contentsWithUndefined: (string | undefined)[] = [];
61-
let indexInContents = 0;
62-
parsedUriResults.forEach((uri) => {
63-
if (uri) {
64-
contentsWithUndefined.push(contents[indexInContents]);
65-
indexInContents++;
63+
// Get contents from web URIs
64+
const webContents = webUris.size > 0 ? await this._readerModeService.extract([...webUris.values()]) : [];
65+
66+
// Get contents from file URIs
67+
const fileContents: (string | undefined)[] = [];
68+
const successfulFileUris: URI[] = [];
69+
for (const uri of fileUris.values()) {
70+
try {
71+
const fileContent = await this._fileService.readFile(uri, undefined, token);
72+
73+
// Check if the content is binary
74+
const detected = detectEncodingFromBuffer({ buffer: fileContent.value, bytesRead: fileContent.value.byteLength });
75+
76+
if (detected.seemsBinary) {
77+
// For binary files, return a message indicating they're not supported
78+
// We do this for now until the tools that leverage this internal tool can support binary content
79+
fileContents.push(localize('fetchWebPage.binaryNotSupported', 'Binary files are not supported at the moment.'));
80+
} else {
81+
// For text files, convert to string
82+
fileContents.push(fileContent.value.toString());
83+
}
84+
85+
successfulFileUris.push(uri);
86+
} catch (error) {
87+
// If file service can't read it, treat as invalid
88+
fileContents.push(undefined);
89+
}
90+
}
91+
92+
// Build results array in original order
93+
const results: (string | undefined)[] = [];
94+
let webIndex = 0;
95+
let fileIndex = 0;
96+
for (const url of urls) {
97+
if (invalidUris.has(url)) {
98+
results.push(undefined);
99+
} else if (webUris.has(url)) {
100+
results.push(webContents[webIndex]);
101+
webIndex++;
102+
} else if (fileUris.has(url)) {
103+
results.push(fileContents[fileIndex]);
104+
fileIndex++;
66105
} else {
67-
contentsWithUndefined.push(undefined);
106+
results.push(undefined);
68107
}
69-
});
108+
}
109+
110+
// Only include URIs that actually had content successfully fetched
111+
const actuallyValidUris = [...webUris.values(), ...successfulFileUris];
70112

71113
return {
72-
content: this._getPromptPartsForResults(contentsWithUndefined),
114+
content: this._getPromptPartsForResults(results),
73115
// Have multiple results show in the dropdown
74-
toolResultDetails: validUris.length > 1 ? validUris : undefined
116+
toolResultDetails: actuallyValidUris.length > 1 ? actuallyValidUris : undefined
75117
};
76118
}
77119

78120
async prepareToolInvocation(context: IToolInvocationPreparationContext, token: CancellationToken): Promise<IPreparedToolInvocation | undefined> {
79-
const map = this._parseUris(context.parameters.urls);
80-
const invalid = new Array<string>();
81-
const valid = new Array<URI>();
82-
map.forEach((uri, url) => {
83-
if (!uri) {
84-
invalid.push(url);
85-
} else {
86-
valid.push(uri);
121+
const { webUris, fileUris, invalidUris } = this._parseUris(context.parameters.urls);
122+
123+
// Check which file URIs can actually be read
124+
const validFileUris: URI[] = [];
125+
const additionalInvalidUrls: string[] = [];
126+
for (const [originalUrl, uri] of fileUris.entries()) {
127+
try {
128+
await this._fileService.stat(uri);
129+
validFileUris.push(uri);
130+
} catch (error) {
131+
// If file service can't stat it, treat as invalid
132+
additionalInvalidUrls.push(originalUrl);
87133
}
88-
});
89-
const urlsNeedingConfirmation = valid.filter(url => !this._alreadyApprovedDomains.has(url));
134+
}
135+
136+
const invalid = [...Array.from(invalidUris), ...additionalInvalidUrls];
137+
const valid = [...webUris.values(), ...validFileUris];
138+
const urlsNeedingConfirmation = webUris.size > 0 ? [...webUris.values()].filter(url => !this._alreadyApprovedDomains.has(url)) : [];
90139

91140
const pastTenseMessage = invalid.length
92141
? invalid.length > 1
93142
// If there are multiple invalid URLs, show them all
94143
? new MarkdownString(
95144
localize(
96145
'fetchWebPage.pastTenseMessage.plural',
97-
'Fetched {0} web pages, but the following were invalid URLs:\n\n{1}\n\n', valid.length, invalid.map(url => `- ${url}`).join('\n')
146+
'Fetched {0} resources, but the following were invalid URLs:\n\n{1}\n\n', valid.length, invalid.map(url => `- ${url}`).join('\n')
98147
))
99148
// If there is only one invalid URL, show it
100149
: new MarkdownString(
101150
localize(
102151
'fetchWebPage.pastTenseMessage.singular',
103-
'Fetched web page, but the following was an invalid URL:\n\n{0}\n\n', invalid[0]
152+
'Fetched resource, but the following was an invalid URL:\n\n{0}\n\n', invalid[0]
104153
))
105154
// No invalid URLs
106155
: new MarkdownString();
107156

108157
const invocationMessage = new MarkdownString();
109158
if (valid.length > 1) {
110-
pastTenseMessage.appendMarkdown(localize('fetchWebPage.pastTenseMessageResult.plural', 'Fetched {0} web pages', valid.length));
111-
invocationMessage.appendMarkdown(localize('fetchWebPage.invocationMessage.plural', 'Fetching {0} web pages', valid.length));
112-
} else {
159+
pastTenseMessage.appendMarkdown(localize('fetchWebPage.pastTenseMessageResult.plural', 'Fetched {0} resources', valid.length));
160+
invocationMessage.appendMarkdown(localize('fetchWebPage.invocationMessage.plural', 'Fetching {0} resources', valid.length));
161+
} else if (valid.length === 1) {
113162
const url = valid[0].toString();
114163
// If the URL is too long, show it as a link... otherwise, show it as plain text
115164
if (url.length > 400) {
@@ -119,14 +168,14 @@ export class FetchWebPageTool implements IToolImpl {
119168
// Make sure the link syntax is correct
120169
'{Locked="]({0})"}',
121170
]
122-
}, 'Fetched [web page]({0})', url));
171+
}, 'Fetched [resource]({0})', url));
123172
invocationMessage.appendMarkdown(localize({
124173
key: 'fetchWebPage.invocationMessage.singularAsLink',
125174
comment: [
126175
// Make sure the link syntax is correct
127176
'{Locked="]({0})"}',
128177
]
129-
}, 'Fetching [web page]({0})', url));
178+
}, 'Fetching [resource]({0})', url));
130179
} else {
131180
pastTenseMessage.appendMarkdown(localize('fetchWebPage.pastTenseMessageResult.singular', 'Fetched {0}', url));
132181
invocationMessage.appendMarkdown(localize('fetchWebPage.invocationMessage.singular', 'Fetching {0}', url));
@@ -157,17 +206,26 @@ export class FetchWebPageTool implements IToolImpl {
157206
return result;
158207
}
159208

160-
private _parseUris(urls?: string[]): Map<string, URI | undefined> {
161-
const results = new Map<string, URI | undefined>();
162-
urls?.forEach(uri => {
209+
private _parseUris(urls?: string[]): { webUris: Map<string, URI>; fileUris: Map<string, URI>; invalidUris: Set<string> } {
210+
const webUris = new Map<string, URI>();
211+
const fileUris = new Map<string, URI>();
212+
const invalidUris = new Set<string>();
213+
214+
urls?.forEach(url => {
163215
try {
164-
const uriObj = URI.parse(uri);
165-
results.set(uri, uriObj);
216+
const uriObj = URI.parse(url);
217+
if (uriObj.scheme === 'http' || uriObj.scheme === 'https') {
218+
webUris.set(url, uriObj);
219+
} else {
220+
// Try to handle other schemes via file service
221+
fileUris.set(url, uriObj);
222+
}
166223
} catch (e) {
167-
results.set(uri, undefined);
224+
invalidUris.add(url);
168225
}
169226
});
170-
return results;
227+
228+
return { webUris, fileUris, invalidUris };
171229
}
172230

173231
private _getPromptPartsForResults(results: (string | undefined)[]): IToolResultTextPart[] {

0 commit comments

Comments
 (0)