Skip to content

Commit e002e61

Browse files
authored
Bugfix/Securely Fetch Links (#5200)
- Added `secureFetch` and `checkDenyList` functions from `httpSecurity` to enhance security in web crawling and link fetching processes. - Updated relevant functions to utilize these new security measures, ensuring safer data handling.
1 parent 4987a28 commit e002e61

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

packages/components/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ export * from '../evaluation/EvaluationRunner'
1313
export * from './followUpPrompts'
1414
export * from './validator'
1515
export * from './agentflowv2Generator'
16+
export * from './httpSecurity'

packages/components/src/utils.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { TextSplitter } from 'langchain/text_splitter'
1818
import { DocumentLoader } from 'langchain/document_loaders/base'
1919
import { NodeVM } from '@flowiseai/nodevm'
2020
import { Sandbox } from '@e2b/code-interpreter'
21+
import { secureFetch, checkDenyList } from './httpSecurity'
2122
import JSON5 from 'json5'
2223

2324
export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}}
@@ -422,7 +423,7 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
422423

423424
if (process.env.DEBUG === 'true') console.info(`actively crawling ${currentURL}`)
424425
try {
425-
const resp = await fetch(currentURL)
426+
const resp = await secureFetch(currentURL)
426427

427428
if (resp.status > 399) {
428429
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
@@ -453,6 +454,8 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
453454
* @returns {Promise<string[]>}
454455
*/
455456
export async function webCrawl(stringURL: string, limit: number): Promise<string[]> {
457+
await checkDenyList(stringURL)
458+
456459
const URLObj = new URL(stringURL)
457460
const modifyURL = stringURL.slice(-1) === '/' ? stringURL.slice(0, -1) : stringURL
458461
return await crawl(URLObj.protocol + '//' + URLObj.hostname, modifyURL, [], limit)
@@ -476,7 +479,7 @@ export async function xmlScrape(currentURL: string, limit: number): Promise<stri
476479
let urls: string[] = []
477480
if (process.env.DEBUG === 'true') console.info(`actively scarping ${currentURL}`)
478481
try {
479-
const resp = await fetch(currentURL)
482+
const resp = await secureFetch(currentURL)
480483

481484
if (resp.status > 399) {
482485
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)

packages/server/src/services/fetch-links/index.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
import { webCrawl, xmlScrape } from 'flowise-components'
1+
import { webCrawl, xmlScrape, checkDenyList } from 'flowise-components'
22
import { StatusCodes } from 'http-status-codes'
33
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
44
import { getErrorMessage } from '../../errors/utils'
55

66
const getAllLinks = async (requestUrl: string, relativeLinksMethod: string, queryLimit: string): Promise<any> => {
77
try {
88
const url = decodeURIComponent(requestUrl)
9+
await checkDenyList(url)
10+
911
if (!relativeLinksMethod) {
1012
throw new InternalFlowiseError(
1113
StatusCodes.INTERNAL_SERVER_ERROR,

0 commit comments

Comments
 (0)