@@ -178,63 +178,51 @@ def test_default_config_is_cached():
178
178
assert default_config_one is default_config_two
179
179
180
180
181
+ @patch ("backend.batch.utilities.helpers.config.config_helper.EnvHelper" )
181
182
def test_default_config_when_use_advanced_image_processing (env_helper_mock ):
182
183
# given
184
+ ConfigHelper ._default_config = None
183
185
env_helper_mock .return_value .USE_ADVANCED_IMAGE_PROCESSING = True
184
186
185
187
# when
186
188
config = ConfigHelper .get_default_config ()
187
189
188
190
# then
189
191
expected_chunking = {"strategy" : "layout" , "size" : 500 , "overlap" : 100 }
190
- assert config ["document_processors" ] == [
191
- {
192
- "document_type" : "pdf" ,
193
- "chunking" : expected_chunking ,
194
- "loading" : {"strategy" : "layout" },
195
- },
196
- {
197
- "document_type" : "txt" ,
198
- "chunking" : expected_chunking ,
199
- "loading" : {"strategy" : "web" },
200
- },
201
- {
202
- "document_type" : "url" ,
203
- "chunking" : expected_chunking ,
204
- "loading" : {"strategy" : "web" },
205
- },
206
- {
207
- "document_type" : "md" ,
208
- "chunking" : expected_chunking ,
209
- "loading" : {"strategy" : "web" },
210
- },
211
- {
212
- "document_type" : "html" ,
213
- "chunking" : expected_chunking ,
214
- "loading" : {"strategy" : "web" },
215
- },
216
- {
217
- "document_type" : "htm" ,
218
- "chunking" : expected_chunking ,
219
- "loading" : {"strategy" : "web" },
220
- },
221
- {
222
- "document_type" : "docx" ,
223
- "chunking" : expected_chunking ,
224
- "loading" : {"strategy" : "docx" },
225
- },
192
+ expected_loading = {"strategy" : "layout" }
193
+ expected_image_processor = {
194
+ "chunking" : expected_chunking ,
195
+ "loading" : expected_loading ,
196
+ "use_advanced_image_processing" : True ,
197
+ }
198
+
199
+ actual_processors = config ["document_processors" ]
200
+
201
+ expected_processors = [
202
+ {"document_type" : "pdf" , "chunking" : expected_chunking , "loading" : expected_loading },
203
+ {"document_type" : "txt" , "chunking" : expected_chunking , "loading" : {"strategy" : "web" }},
204
+ {"document_type" : "url" , "chunking" : expected_chunking , "loading" : {"strategy" : "web" }},
205
+ {"document_type" : "md" , "chunking" : expected_chunking , "loading" : {"strategy" : "web" }},
206
+ {"document_type" : "html" , "chunking" : expected_chunking , "loading" : {"strategy" : "web" }},
207
+ {"document_type" : "htm" , "chunking" : expected_chunking , "loading" : {"strategy" : "web" }},
208
+ {"document_type" : "docx" , "chunking" : expected_chunking , "loading" : {"strategy" : "docx" }},
226
209
{
227
210
"document_type" : "json" ,
228
211
"chunking" : {"strategy" : "json" , "size" : 500 , "overlap" : 100 },
229
212
"loading" : {"strategy" : "web" },
230
213
},
231
- {"document_type" : "jpeg" , "use_advanced_image_processing" : True },
232
- {"document_type" : "jpg" , "use_advanced_image_processing" : True },
233
- {"document_type" : "png" , "use_advanced_image_processing" : True },
234
- {"document_type" : "tiff" , "use_advanced_image_processing" : True },
235
- {"document_type" : "bmp" , "use_advanced_image_processing" : True },
214
+ {"document_type" : "jpg" , "chunking" : expected_chunking , "loading" : expected_loading },
215
+ {"document_type" : "jpeg" , "chunking" : expected_chunking , "loading" : expected_loading },
216
+ {"document_type" : "png" , "chunking" : expected_chunking , "loading" : expected_loading },
217
+ {"document_type" : "jpeg" , ** expected_image_processor },
218
+ {"document_type" : "jpg" , ** expected_image_processor },
219
+ {"document_type" : "png" , ** expected_image_processor },
220
+ {"document_type" : "tiff" , ** expected_image_processor },
221
+ {"document_type" : "bmp" , ** expected_image_processor },
236
222
]
237
223
224
+ assert actual_processors == expected_processors
225
+
238
226
239
227
def test_get_config_from_azure (
240
228
AzureBlobStorageClientMock : MagicMock ,
0 commit comments