Skip to content

Commit 8abf50e

Browse files
committed
Add docx support
1 parent 88af2d2 commit 8abf50e

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

app/src/file_processor.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
import streamlit as st
33
import os
44
from typing import Optional
5+
from docx import Document
56
#from tests import test_file_ext
67

78
class FileProcessor:
89
def __init__(self):
910
self.file = None
1011
self.file_type: None
1112
self.supported_types = {
12-
".txt": self._read_txt
13+
".txt": self._read_txt,
14+
".docx": self._read_docx
1315
}
1416
self.content = None
1517

@@ -27,6 +29,14 @@ def upload_file(self) -> None:
2729

2830
def _read_txt(self) -> str:
2931
return self.file.getvalue().decode("utf-8")
32+
33+
def _read_docx(self) -> str:
34+
doc = Document(self.file)
35+
text = []
36+
for para in doc.paragraphs:
37+
text.append(para.text)
38+
return "\n".join(text)
39+
3040

3141
def get_content(self) -> Optional[str]:
3242
if not self.file:

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ dotenv
44
requests
55
typing
66
pytest
7-
openai
7+
openai
8+
python-docx

0 commit comments

Comments
 (0)