1
- import { omit } from 'lodash'
2
1
import { TextSplitter } from 'langchain/text_splitter'
3
- import { CSVLoader } from '@langchain/community/document_loaders/fs/csv '
4
- import { getFileFromStorage , handleEscapeCharacters } from '../../../src'
2
+ import { CSVLoader } from './CsvLoader '
3
+ import { getFileFromStorage , handleDocumentLoaderDocuments , handleDocumentLoaderMetadata , handleDocumentLoaderOutput } from '../../../src'
5
4
import { ICommonObject , IDocument , INode , INodeData , INodeOutputsValue , INodeParams } from '../../../src/Interface'
6
5
7
6
class Csv_DocumentLoaders implements INode {
@@ -19,7 +18,7 @@ class Csv_DocumentLoaders implements INode {
19
18
constructor ( ) {
20
19
this . label = 'Csv File'
21
20
this . name = 'csvFile'
22
- this . version = 2 .0
21
+ this . version = 3 .0
23
22
this . type = 'Document'
24
23
this . icon = 'csv.svg'
25
24
this . category = 'Document Loaders'
@@ -82,21 +81,11 @@ class Csv_DocumentLoaders implements INode {
82
81
]
83
82
}
84
83
85
- async init ( nodeData : INodeData , _ : string , options : ICommonObject ) : Promise < any > {
86
- const textSplitter = nodeData . inputs ?. textSplitter as TextSplitter
84
+ getFiles ( nodeData : INodeData ) {
87
85
const csvFileBase64 = nodeData . inputs ?. csvFile as string
88
- const columnName = nodeData . inputs ?. columnName as string
89
- const metadata = nodeData . inputs ?. metadata
90
- const output = nodeData . outputs ?. output as string
91
- const _omitMetadataKeys = nodeData . inputs ?. omitMetadataKeys as string
92
-
93
- let omitMetadataKeys : string [ ] = [ ]
94
- if ( _omitMetadataKeys ) {
95
- omitMetadataKeys = _omitMetadataKeys . split ( ',' ) . map ( ( key ) => key . trim ( ) )
96
- }
97
86
98
- let docs : IDocument [ ] = [ ]
99
87
let files : string [ ] = [ ]
88
+ let fromStorage : boolean = true
100
89
101
90
if ( csvFileBase64 . startsWith ( 'FILE-STORAGE::' ) ) {
102
91
const fileName = csvFileBase64 . replace ( 'FILE-STORAGE::' , '' )
@@ -105,86 +94,56 @@ class Csv_DocumentLoaders implements INode {
105
94
} else {
106
95
files = [ fileName ]
107
96
}
108
- const chatflowid = options . chatflowid
109
-
110
- for ( const file of files ) {
111
- if ( ! file ) continue
112
- const fileData = await getFileFromStorage ( file , chatflowid )
113
- const blob = new Blob ( [ fileData ] )
114
- const loader = new CSVLoader ( blob , columnName . trim ( ) . length === 0 ? undefined : columnName . trim ( ) )
115
-
116
- if ( textSplitter ) {
117
- docs = await loader . load ( )
118
- docs = await textSplitter . splitDocuments ( docs )
119
- } else {
120
- docs . push ( ...( await loader . load ( ) ) )
121
- }
122
- }
123
97
} else {
124
98
if ( csvFileBase64 . startsWith ( '[' ) && csvFileBase64 . endsWith ( ']' ) ) {
125
99
files = JSON . parse ( csvFileBase64 )
126
100
} else {
127
101
files = [ csvFileBase64 ]
128
102
}
129
103
130
- for ( const file of files ) {
131
- if ( ! file ) continue
132
- const splitDataURI = file . split ( ',' )
133
- splitDataURI . pop ( )
134
- const bf = Buffer . from ( splitDataURI . pop ( ) || '' , 'base64' )
135
- const blob = new Blob ( [ bf ] )
136
- const loader = new CSVLoader ( blob , columnName . trim ( ) . length === 0 ? undefined : columnName . trim ( ) )
137
-
138
- if ( textSplitter ) {
139
- docs = await loader . load ( )
140
- docs = await textSplitter . splitDocuments ( docs )
141
- } else {
142
- docs . push ( ...( await loader . load ( ) ) )
143
- }
144
- }
104
+ fromStorage = false
145
105
}
146
106
147
- if ( metadata ) {
148
- const parsedMetadata = typeof metadata === 'object' ? metadata : JSON . parse ( metadata )
149
- docs = docs . map ( ( doc ) => ( {
150
- ...doc ,
151
- metadata :
152
- _omitMetadataKeys === '*'
153
- ? {
154
- ...parsedMetadata
155
- }
156
- : omit (
157
- {
158
- ...doc . metadata ,
159
- ...parsedMetadata
160
- } ,
161
- omitMetadataKeys
162
- )
163
- } ) )
107
+ return { files, fromStorage }
108
+ }
109
+
110
+ async getFileData ( file : string , { chatflowid } : { chatflowid : string } , fromStorage ?: boolean ) {
111
+ if ( fromStorage ) {
112
+ return getFileFromStorage ( file , chatflowid )
164
113
} else {
165
- docs = docs . map ( ( doc ) => ( {
166
- ...doc ,
167
- metadata :
168
- _omitMetadataKeys === '*'
169
- ? { }
170
- : omit (
171
- {
172
- ...doc . metadata
173
- } ,
174
- omitMetadataKeys
175
- )
176
- } ) )
114
+ const splitDataURI = file . split ( ',' )
115
+ splitDataURI . pop ( )
116
+ return Buffer . from ( splitDataURI . pop ( ) || '' , 'base64' )
177
117
}
118
+ }
178
119
179
- if ( output === 'document' ) {
180
- return docs
181
- } else {
182
- let finaltext = ''
183
- for ( const doc of docs ) {
184
- finaltext += `${ doc . pageContent } \n`
185
- }
186
- return handleEscapeCharacters ( finaltext , false )
120
+ async init ( nodeData : INodeData , _ : string , options : ICommonObject ) : Promise < any > {
121
+ const textSplitter = nodeData . inputs ?. textSplitter as TextSplitter
122
+ const columnName = nodeData . inputs ?. columnName as string
123
+ const metadata = nodeData . inputs ?. metadata
124
+ const output = nodeData . outputs ?. output as string
125
+ const _omitMetadataKeys = nodeData . inputs ?. omitMetadataKeys as string
126
+
127
+ let docs : IDocument [ ] = [ ]
128
+
129
+ const chatflowid = options . chatflowid
130
+
131
+ const { files, fromStorage } = this . getFiles ( nodeData )
132
+
133
+ for ( const file of files ) {
134
+ if ( ! file ) continue
135
+
136
+ const fileData = await this . getFileData ( file , { chatflowid } , fromStorage )
137
+ const blob = new Blob ( [ fileData ] )
138
+ const loader = new CSVLoader ( blob , columnName . trim ( ) . length === 0 ? undefined : columnName . trim ( ) )
139
+
140
+ // use spread instead of push, because it raises RangeError: Maximum call stack size exceeded when too many docs
141
+ docs = [ ...docs , ...( await handleDocumentLoaderDocuments ( loader , textSplitter ) ) ]
187
142
}
143
+
144
+ docs = handleDocumentLoaderMetadata ( docs , _omitMetadataKeys , metadata )
145
+
146
+ return handleDocumentLoaderOutput ( docs , output )
188
147
}
189
148
}
190
149
0 commit comments