Skip to content

Commit a9617cd

Browse files
authored
Add HTML parser (#1325)
1 parent 2e79777 commit a9617cd

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

scripts/prepdocs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ async def setup_file_strategy(credential: AsyncTokenCredential, args: Any) -> St
7070
sentence_text_splitter = SentenceTextSplitter(has_image_embeddings=args.searchimages)
7171
file_processors = {
7272
".pdf": FileProcessor(pdf_parser, sentence_text_splitter),
73+
".html": FileProcessor(doc_int_parser, sentence_text_splitter),
7374
".json": FileProcessor(JsonParser(), SimpleTextSplitter()),
7475
".docx": FileProcessor(doc_int_parser, sentence_text_splitter),
7576
".pptx": FileProcessor(doc_int_parser, sentence_text_splitter),

0 commit comments

Comments
 (0)