@@ -18,6 +18,7 @@ import { TextSplitter } from 'langchain/text_splitter'
18
18
import { DocumentLoader } from 'langchain/document_loaders/base'
19
19
import { NodeVM } from '@flowiseai/nodevm'
20
20
import { Sandbox } from '@e2b/code-interpreter'
21
+ import { secureFetch , checkDenyList } from './httpSecurity'
21
22
import JSON5 from 'json5'
22
23
23
24
export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}}
@@ -422,7 +423,7 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
422
423
423
424
if ( process . env . DEBUG === 'true' ) console . info ( `actively crawling ${ currentURL } ` )
424
425
try {
425
- const resp = await fetch ( currentURL )
426
+ const resp = await secureFetch ( currentURL )
426
427
427
428
if ( resp . status > 399 ) {
428
429
if ( process . env . DEBUG === 'true' ) console . error ( `error in fetch with status code: ${ resp . status } , on page: ${ currentURL } ` )
@@ -453,6 +454,8 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
453
454
* @returns {Promise<string[]> }
454
455
*/
455
456
export async function webCrawl ( stringURL : string , limit : number ) : Promise < string [ ] > {
457
+ await checkDenyList ( stringURL )
458
+
456
459
const URLObj = new URL ( stringURL )
457
460
const modifyURL = stringURL . slice ( - 1 ) === '/' ? stringURL . slice ( 0 , - 1 ) : stringURL
458
461
return await crawl ( URLObj . protocol + '//' + URLObj . hostname , modifyURL , [ ] , limit )
@@ -476,7 +479,7 @@ export async function xmlScrape(currentURL: string, limit: number): Promise<stri
476
479
let urls : string [ ] = [ ]
477
480
if ( process . env . DEBUG === 'true' ) console . info ( `actively scarping ${ currentURL } ` )
478
481
try {
479
- const resp = await fetch ( currentURL )
482
+ const resp = await secureFetch ( currentURL )
480
483
481
484
if ( resp . status > 399 ) {
482
485
if ( process . env . DEBUG === 'true' ) console . error ( `error in fetch with status code: ${ resp . status } , on page: ${ currentURL } ` )
0 commit comments