@@ -22,12 +22,17 @@ import {
2222 logWebFetchFallbackAttempt ,
2323 WebFetchFallbackAttemptEvent ,
2424} from '../telemetry/index.js' ;
25+ import { convert } from 'html-to-text' ;
2526
2627const mockGenerateContent = vi . fn ( ) ;
2728const mockGetGeminiClient = vi . fn ( ( ) => ( {
2829 generateContent : mockGenerateContent ,
2930} ) ) ;
3031
32+ vi . mock ( 'html-to-text' , ( ) => ( {
33+ convert : vi . fn ( ( text ) => `Converted: ${ text } ` ) ,
34+ } ) ) ;
35+
3136vi . mock ( '../telemetry/index.js' , ( ) => ( {
3237 logWebFetchFallbackAttempt : vi . fn ( ) ,
3338 WebFetchFallbackAttemptEvent : vi . fn ( ) ,
@@ -246,6 +251,116 @@ describe('WebFetchTool', () => {
246251 } ) ;
247252 } ) ;
248253
254+ describe ( 'execute (fallback)' , ( ) => {
255+ beforeEach ( ( ) => {
256+ // Force fallback by mocking primary fetch to fail
257+ vi . spyOn ( fetchUtils , 'isPrivateIp' ) . mockReturnValue ( false ) ;
258+ mockGenerateContent . mockResolvedValueOnce ( {
259+ candidates : [ ] ,
260+ } ) ;
261+ } ) ;
262+
263+ it ( 'should convert HTML content using html-to-text' , async ( ) => {
264+ const htmlContent = '<html><body><h1>Hello</h1></body></html>' ;
265+ vi . spyOn ( fetchUtils , 'fetchWithTimeout' ) . mockResolvedValue ( {
266+ ok : true ,
267+ headers : new Headers ( { 'content-type' : 'text/html; charset=utf-8' } ) ,
268+ text : ( ) => Promise . resolve ( htmlContent ) ,
269+ } as Response ) ;
270+
271+ // Mock fallback LLM call to return the content passed to it
272+ mockGenerateContent . mockImplementationOnce ( async ( req ) => ( {
273+ candidates : [ { content : { parts : [ { text : req [ 0 ] . parts [ 0 ] . text } ] } } ] ,
274+ } ) ) ;
275+
276+ const tool = new WebFetchTool ( mockConfig ) ;
277+ const params = { prompt : 'fetch https://example.com' } ;
278+ const invocation = tool . build ( params ) ;
279+ const result = await invocation . execute ( new AbortController ( ) . signal ) ;
280+
281+ expect ( convert ) . toHaveBeenCalledWith ( htmlContent , {
282+ wordwrap : false ,
283+ selectors : [
284+ { selector : 'a' , options : { ignoreHref : true } } ,
285+ { selector : 'img' , format : 'skip' } ,
286+ ] ,
287+ } ) ;
288+ expect ( result . llmContent ) . toContain ( `Converted: ${ htmlContent } ` ) ;
289+ } ) ;
290+
291+ it ( 'should return raw text for JSON content' , async ( ) => {
292+ const jsonContent = '{"key": "value"}' ;
293+ vi . spyOn ( fetchUtils , 'fetchWithTimeout' ) . mockResolvedValue ( {
294+ ok : true ,
295+ headers : new Headers ( { 'content-type' : 'application/json' } ) ,
296+ text : ( ) => Promise . resolve ( jsonContent ) ,
297+ } as Response ) ;
298+
299+ // Mock fallback LLM call to return the content passed to it
300+ mockGenerateContent . mockImplementationOnce ( async ( req ) => ( {
301+ candidates : [ { content : { parts : [ { text : req [ 0 ] . parts [ 0 ] . text } ] } } ] ,
302+ } ) ) ;
303+
304+ const tool = new WebFetchTool ( mockConfig ) ;
305+ const params = { prompt : 'fetch https://example.com' } ;
306+ const invocation = tool . build ( params ) ;
307+ const result = await invocation . execute ( new AbortController ( ) . signal ) ;
308+
309+ expect ( convert ) . not . toHaveBeenCalled ( ) ;
310+ expect ( result . llmContent ) . toContain ( jsonContent ) ;
311+ } ) ;
312+
313+ it ( 'should return raw text for plain text content' , async ( ) => {
314+ const textContent = 'Just some text.' ;
315+ vi . spyOn ( fetchUtils , 'fetchWithTimeout' ) . mockResolvedValue ( {
316+ ok : true ,
317+ headers : new Headers ( { 'content-type' : 'text/plain' } ) ,
318+ text : ( ) => Promise . resolve ( textContent ) ,
319+ } as Response ) ;
320+
321+ // Mock fallback LLM call to return the content passed to it
322+ mockGenerateContent . mockImplementationOnce ( async ( req ) => ( {
323+ candidates : [ { content : { parts : [ { text : req [ 0 ] . parts [ 0 ] . text } ] } } ] ,
324+ } ) ) ;
325+
326+ const tool = new WebFetchTool ( mockConfig ) ;
327+ const params = { prompt : 'fetch https://example.com' } ;
328+ const invocation = tool . build ( params ) ;
329+ const result = await invocation . execute ( new AbortController ( ) . signal ) ;
330+
331+ expect ( convert ) . not . toHaveBeenCalled ( ) ;
332+ expect ( result . llmContent ) . toContain ( textContent ) ;
333+ } ) ;
334+
335+ it ( 'should treat content with no Content-Type header as HTML' , async ( ) => {
336+ const content = '<p>No header</p>' ;
337+ vi . spyOn ( fetchUtils , 'fetchWithTimeout' ) . mockResolvedValue ( {
338+ ok : true ,
339+ headers : new Headers ( ) ,
340+ text : ( ) => Promise . resolve ( content ) ,
341+ } as Response ) ;
342+
343+ // Mock fallback LLM call to return the content passed to it
344+ mockGenerateContent . mockImplementationOnce ( async ( req ) => ( {
345+ candidates : [ { content : { parts : [ { text : req [ 0 ] . parts [ 0 ] . text } ] } } ] ,
346+ } ) ) ;
347+
348+ const tool = new WebFetchTool ( mockConfig ) ;
349+ const params = { prompt : 'fetch https://example.com' } ;
350+ const invocation = tool . build ( params ) ;
351+ const result = await invocation . execute ( new AbortController ( ) . signal ) ;
352+
353+ expect ( convert ) . toHaveBeenCalledWith ( content , {
354+ wordwrap : false ,
355+ selectors : [
356+ { selector : 'a' , options : { ignoreHref : true } } ,
357+ { selector : 'img' , format : 'skip' } ,
358+ ] ,
359+ } ) ;
360+ expect ( result . llmContent ) . toContain ( `Converted: ${ content } ` ) ;
361+ } ) ;
362+ } ) ;
363+
249364 describe ( 'shouldConfirmExecute' , ( ) => {
250365 it ( 'should return confirmation details with the correct prompt and parsed urls' , async ( ) => {
251366 const tool = new WebFetchTool ( mockConfig ) ;
0 commit comments