@@ -8,9 +8,11 @@ import { MarkdownString } from '../../../../../base/common/htmlContent.js';
8
8
import { ResourceSet } from '../../../../../base/common/map.js' ;
9
9
import { URI } from '../../../../../base/common/uri.js' ;
10
10
import { localize } from '../../../../../nls.js' ;
11
+ import { IFileService } from '../../../../../platform/files/common/files.js' ;
11
12
import { IWebContentExtractorService } from '../../../../../platform/webContentExtractor/common/webContentExtractor.js' ;
12
13
import { CountTokensCallback , IPreparedToolInvocation , IToolData , IToolImpl , IToolInvocation , IToolInvocationPreparationContext , IToolResult , IToolResultTextPart , ToolDataSource , ToolProgress } from '../../common/languageModelToolsService.js' ;
13
14
import { InternalFetchWebPageToolId } from '../../common/tools/tools.js' ;
15
+ import { detectEncodingFromBuffer } from '../../../../services/textfile/common/encoding.js' ;
14
16
15
17
export const FetchWebPageToolData : IToolData = {
16
18
id : InternalFetchWebPageToolId ,
@@ -38,78 +40,125 @@ export class FetchWebPageTool implements IToolImpl {
38
40
39
41
constructor (
40
42
@IWebContentExtractorService private readonly _readerModeService : IWebContentExtractorService ,
43
+ @IFileService private readonly _fileService : IFileService ,
41
44
) { }
42
45
43
- async invoke ( invocation : IToolInvocation , _countTokens : CountTokensCallback , _progress : ToolProgress , _token : CancellationToken ) : Promise < IToolResult > {
44
- const parsedUriResults = this . _parseUris ( ( invocation . parameters as { urls ?: string [ ] } ) . urls ) ;
45
- const validUris = Array . from ( parsedUriResults . values ( ) ) . filter ( ( uri ) : uri is URI => ! ! uri ) ;
46
- if ( ! validUris . length ) {
46
+ async invoke ( invocation : IToolInvocation , _countTokens : CountTokensCallback , _progress : ToolProgress , token : CancellationToken ) : Promise < IToolResult > {
47
+ const urls = ( invocation . parameters as { urls ?: string [ ] } ) . urls || [ ] ;
48
+ const { webUris, fileUris, invalidUris } = this . _parseUris ( urls ) ;
49
+ const allValidUris = [ ...webUris . values ( ) , ...fileUris . values ( ) ] ;
50
+
51
+ if ( ! allValidUris . length && invalidUris . size === 0 ) {
47
52
return {
48
53
content : [ { kind : 'text' , value : localize ( 'fetchWebPage.noValidUrls' , 'No valid URLs provided.' ) } ]
49
54
} ;
50
55
}
51
56
52
57
// We approved these via confirmation, so mark them as "approved" in this session
53
58
// if they are not approved via the trusted domain service.
54
- for ( const uri of validUris ) {
59
+ for ( const uri of webUris . values ( ) ) {
55
60
this . _alreadyApprovedDomains . add ( uri ) ;
56
61
}
57
62
58
- const contents = await this . _readerModeService . extract ( validUris ) ;
59
- // Make an array that contains either the content or undefined for invalid URLs
60
- const contentsWithUndefined : ( string | undefined ) [ ] = [ ] ;
61
- let indexInContents = 0 ;
62
- parsedUriResults . forEach ( ( uri ) => {
63
- if ( uri ) {
64
- contentsWithUndefined . push ( contents [ indexInContents ] ) ;
65
- indexInContents ++ ;
63
+ // Get contents from web URIs
64
+ const webContents = webUris . size > 0 ? await this . _readerModeService . extract ( [ ...webUris . values ( ) ] ) : [ ] ;
65
+
66
+ // Get contents from file URIs
67
+ const fileContents : ( string | undefined ) [ ] = [ ] ;
68
+ const successfulFileUris : URI [ ] = [ ] ;
69
+ for ( const uri of fileUris . values ( ) ) {
70
+ try {
71
+ const fileContent = await this . _fileService . readFile ( uri , undefined , token ) ;
72
+
73
+ // Check if the content is binary
74
+ const detected = detectEncodingFromBuffer ( { buffer : fileContent . value , bytesRead : fileContent . value . byteLength } ) ;
75
+
76
+ if ( detected . seemsBinary ) {
77
+ // For binary files, return a message indicating they're not supported
78
+ // We do this for now until the tools that leverage this internal tool can support binary content
79
+ fileContents . push ( localize ( 'fetchWebPage.binaryNotSupported' , 'Binary files are not supported at the moment.' ) ) ;
80
+ } else {
81
+ // For text files, convert to string
82
+ fileContents . push ( fileContent . value . toString ( ) ) ;
83
+ }
84
+
85
+ successfulFileUris . push ( uri ) ;
86
+ } catch ( error ) {
87
+ // If file service can't read it, treat as invalid
88
+ fileContents . push ( undefined ) ;
89
+ }
90
+ }
91
+
92
+ // Build results array in original order
93
+ const results : ( string | undefined ) [ ] = [ ] ;
94
+ let webIndex = 0 ;
95
+ let fileIndex = 0 ;
96
+ for ( const url of urls ) {
97
+ if ( invalidUris . has ( url ) ) {
98
+ results . push ( undefined ) ;
99
+ } else if ( webUris . has ( url ) ) {
100
+ results . push ( webContents [ webIndex ] ) ;
101
+ webIndex ++ ;
102
+ } else if ( fileUris . has ( url ) ) {
103
+ results . push ( fileContents [ fileIndex ] ) ;
104
+ fileIndex ++ ;
66
105
} else {
67
- contentsWithUndefined . push ( undefined ) ;
106
+ results . push ( undefined ) ;
68
107
}
69
- } ) ;
108
+ }
109
+
110
+ // Only include URIs that actually had content successfully fetched
111
+ const actuallyValidUris = [ ...webUris . values ( ) , ...successfulFileUris ] ;
70
112
71
113
return {
72
- content : this . _getPromptPartsForResults ( contentsWithUndefined ) ,
114
+ content : this . _getPromptPartsForResults ( results ) ,
73
115
// Have multiple results show in the dropdown
74
- toolResultDetails : validUris . length > 1 ? validUris : undefined
116
+ toolResultDetails : actuallyValidUris . length > 1 ? actuallyValidUris : undefined
75
117
} ;
76
118
}
77
119
78
120
async prepareToolInvocation ( context : IToolInvocationPreparationContext , token : CancellationToken ) : Promise < IPreparedToolInvocation | undefined > {
79
- const map = this . _parseUris ( context . parameters . urls ) ;
80
- const invalid = new Array < string > ( ) ;
81
- const valid = new Array < URI > ( ) ;
82
- map . forEach ( ( uri , url ) => {
83
- if ( ! uri ) {
84
- invalid . push ( url ) ;
85
- } else {
86
- valid . push ( uri ) ;
121
+ const { webUris, fileUris, invalidUris } = this . _parseUris ( context . parameters . urls ) ;
122
+
123
+ // Check which file URIs can actually be read
124
+ const validFileUris : URI [ ] = [ ] ;
125
+ const additionalInvalidUrls : string [ ] = [ ] ;
126
+ for ( const [ originalUrl , uri ] of fileUris . entries ( ) ) {
127
+ try {
128
+ await this . _fileService . stat ( uri ) ;
129
+ validFileUris . push ( uri ) ;
130
+ } catch ( error ) {
131
+ // If file service can't stat it, treat as invalid
132
+ additionalInvalidUrls . push ( originalUrl ) ;
87
133
}
88
- } ) ;
89
- const urlsNeedingConfirmation = valid . filter ( url => ! this . _alreadyApprovedDomains . has ( url ) ) ;
134
+ }
135
+
136
+ const invalid = [ ...Array . from ( invalidUris ) , ...additionalInvalidUrls ] ;
137
+ const valid = [ ...webUris . values ( ) , ...validFileUris ] ;
138
+ const urlsNeedingConfirmation = webUris . size > 0 ? [ ...webUris . values ( ) ] . filter ( url => ! this . _alreadyApprovedDomains . has ( url ) ) : [ ] ;
90
139
91
140
const pastTenseMessage = invalid . length
92
141
? invalid . length > 1
93
142
// If there are multiple invalid URLs, show them all
94
143
? new MarkdownString (
95
144
localize (
96
145
'fetchWebPage.pastTenseMessage.plural' ,
97
- 'Fetched {0} web pages , but the following were invalid URLs:\n\n{1}\n\n' , valid . length , invalid . map ( url => `- ${ url } ` ) . join ( '\n' )
146
+ 'Fetched {0} resources , but the following were invalid URLs:\n\n{1}\n\n' , valid . length , invalid . map ( url => `- ${ url } ` ) . join ( '\n' )
98
147
) )
99
148
// If there is only one invalid URL, show it
100
149
: new MarkdownString (
101
150
localize (
102
151
'fetchWebPage.pastTenseMessage.singular' ,
103
- 'Fetched web page , but the following was an invalid URL:\n\n{0}\n\n' , invalid [ 0 ]
152
+ 'Fetched resource , but the following was an invalid URL:\n\n{0}\n\n' , invalid [ 0 ]
104
153
) )
105
154
// No invalid URLs
106
155
: new MarkdownString ( ) ;
107
156
108
157
const invocationMessage = new MarkdownString ( ) ;
109
158
if ( valid . length > 1 ) {
110
- pastTenseMessage . appendMarkdown ( localize ( 'fetchWebPage.pastTenseMessageResult.plural' , 'Fetched {0} web pages ' , valid . length ) ) ;
111
- invocationMessage . appendMarkdown ( localize ( 'fetchWebPage.invocationMessage.plural' , 'Fetching {0} web pages ' , valid . length ) ) ;
112
- } else {
159
+ pastTenseMessage . appendMarkdown ( localize ( 'fetchWebPage.pastTenseMessageResult.plural' , 'Fetched {0} resources ' , valid . length ) ) ;
160
+ invocationMessage . appendMarkdown ( localize ( 'fetchWebPage.invocationMessage.plural' , 'Fetching {0} resources ' , valid . length ) ) ;
161
+ } else if ( valid . length === 1 ) {
113
162
const url = valid [ 0 ] . toString ( ) ;
114
163
// If the URL is too long, show it as a link... otherwise, show it as plain text
115
164
if ( url . length > 400 ) {
@@ -119,14 +168,14 @@ export class FetchWebPageTool implements IToolImpl {
119
168
// Make sure the link syntax is correct
120
169
'{Locked="]({0})"}' ,
121
170
]
122
- } , 'Fetched [web page ]({0})' , url ) ) ;
171
+ } , 'Fetched [resource ]({0})' , url ) ) ;
123
172
invocationMessage . appendMarkdown ( localize ( {
124
173
key : 'fetchWebPage.invocationMessage.singularAsLink' ,
125
174
comment : [
126
175
// Make sure the link syntax is correct
127
176
'{Locked="]({0})"}' ,
128
177
]
129
- } , 'Fetching [web page ]({0})' , url ) ) ;
178
+ } , 'Fetching [resource ]({0})' , url ) ) ;
130
179
} else {
131
180
pastTenseMessage . appendMarkdown ( localize ( 'fetchWebPage.pastTenseMessageResult.singular' , 'Fetched {0}' , url ) ) ;
132
181
invocationMessage . appendMarkdown ( localize ( 'fetchWebPage.invocationMessage.singular' , 'Fetching {0}' , url ) ) ;
@@ -157,17 +206,26 @@ export class FetchWebPageTool implements IToolImpl {
157
206
return result ;
158
207
}
159
208
160
- private _parseUris ( urls ?: string [ ] ) : Map < string , URI | undefined > {
161
- const results = new Map < string , URI | undefined > ( ) ;
162
- urls ?. forEach ( uri => {
209
+ private _parseUris ( urls ?: string [ ] ) : { webUris : Map < string , URI > ; fileUris : Map < string , URI > ; invalidUris : Set < string > } {
210
+ const webUris = new Map < string , URI > ( ) ;
211
+ const fileUris = new Map < string , URI > ( ) ;
212
+ const invalidUris = new Set < string > ( ) ;
213
+
214
+ urls ?. forEach ( url => {
163
215
try {
164
- const uriObj = URI . parse ( uri ) ;
165
- results . set ( uri , uriObj ) ;
216
+ const uriObj = URI . parse ( url ) ;
217
+ if ( uriObj . scheme === 'http' || uriObj . scheme === 'https' ) {
218
+ webUris . set ( url , uriObj ) ;
219
+ } else {
220
+ // Try to handle other schemes via file service
221
+ fileUris . set ( url , uriObj ) ;
222
+ }
166
223
} catch ( e ) {
167
- results . set ( uri , undefined ) ;
224
+ invalidUris . add ( url ) ;
168
225
}
169
226
} ) ;
170
- return results ;
227
+
228
+ return { webUris, fileUris, invalidUris } ;
171
229
}
172
230
173
231
private _getPromptPartsForResults ( results : ( string | undefined ) [ ] ) : IToolResultTextPart [ ] {
0 commit comments