Skip to content

Commit fd8619a

Browse files
Himanshi-MirosoftHimanshi AgrawalRoopan-Microsoft
authored
fix: Type HTM upload issue fix on explore page under admin (#1172)
Co-authored-by: Himanshi Agrawal <[email protected]> Co-authored-by: Roopan P M <[email protected]>
1 parent 604b14e commit fd8619a

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

code/backend/batch/utilities/helpers/config/config_helper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def get_available_document_types(self) -> list[str]:
6363
"pdf",
6464
"url",
6565
"html",
66+
"htm",
6667
"md",
6768
"jpeg",
6869
"jpg",

code/backend/batch/utilities/helpers/config/default.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@
7474
"strategy": "web"
7575
}
7676
},
77+
{
78+
"document_type": "htm",
79+
"chunking": {
80+
"strategy": "layout",
81+
"size": 500,
82+
"overlap": 100
83+
},
84+
"loading": {
85+
"strategy": "web"
86+
}
87+
},
7788
{
7889
"document_type": "docx",
7990
"chunking": {

code/tests/utilities/helpers/test_config_helper.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ def test_default_config_when_use_advanced_image_processing(env_helper_mock):
211211
"chunking": expected_chunking,
212212
"loading": {"strategy": "web"},
213213
},
214+
{
215+
"document_type": "htm",
216+
"chunking": expected_chunking,
217+
"loading": {"strategy": "web"},
218+
},
214219
{
215220
"document_type": "docx",
216221
"chunking": expected_chunking,
@@ -409,7 +414,7 @@ def test_get_available_document_types(config: Config):
409414

410415
# then
411416
assert sorted(document_types) == sorted(
412-
["txt", "pdf", "url", "html", "md", "jpeg", "jpg", "png", "docx"]
417+
["txt", "pdf", "url", "html", "htm", "md", "jpeg", "jpg", "png", "docx"]
413418
)
414419

415420

@@ -424,7 +429,7 @@ def test_get_available_document_types_when_advanced_image_processing_enabled(
424429

425430
# then
426431
assert sorted(document_types) == sorted(
427-
["txt", "pdf", "url", "html", "md", "jpeg", "jpg", "png", "docx", "tiff", "bmp"]
432+
["txt", "pdf", "url", "html", "htm", "md", "jpeg", "jpg", "png", "docx", "tiff", "bmp"]
428433
)
429434

430435

0 commit comments

Comments
 (0)