Skip to content

Commit 7324543

Browse files
authored
Merge pull request #106 from apkostka/feat/zip-code
feat: enter user location before chat
2 parents 7f48311 + b711753 commit 7324543

File tree

16 files changed

+13944
-13600
lines changed

16 files changed

+13944
-13600
lines changed

backend/scripts/create_vector_store.py

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,54 +12,72 @@
1212

1313
client = OpenAI(api_key=API_KEY)
1414

15-
# Note: we exit if the vector store already exists because
16-
# OpenAI does not return the filenames of files in a vector store,
17-
# meaning we cannot check if the files we want to upload
18-
# already exist in the vector store.
19-
# If you want to update the vector store, delete it first
20-
# and then run this script again.
2115
# TODO: Would be nice to have a better way to check for the vector store than just the name.
2216
vector_stores = client.vector_stores.list()
2317
if any(store.name == "Oregon Housing Law" for store in vector_stores):
2418
vector_store = next(
2519
store for store in vector_stores if store.name == "Oregon Housing Law"
2620
)
27-
print(
28-
f"Vector store 'Oregon Housing Law' already exists.\n"
29-
f"Add the following to your .env file to use this vector store:\n"
30-
f"VECTOR_STORE_ID={vector_store.id}\n"
21+
# Delete all files in the vector store
22+
vector_store_files = client.vector_stores.files.list(
23+
vector_store_id=vector_store.id
3124
)
32-
exit(1)
25+
for file in vector_store_files:
26+
print(f"Deleting file {file.id} from vector store '{vector_store.name}'.")
27+
client.vector_stores.files.delete(
28+
vector_store_id=vector_store.id, file_id=file.id
29+
)
30+
client.files.delete(file_id=file.id)
3331

3432
else:
3533
print("Creating vector store 'Oregon Housing Law'.")
3634

3735
# Create a new vector store
3836
vector_store = client.vector_stores.create(name="Oregon Housing Law")
3937

40-
# Get all the files in ./documents
41-
documents_path = Path("./scripts/documents")
42-
file_paths = [
43-
f
44-
for f in os.listdir(documents_path)
45-
if os.path.isfile(os.path.join(documents_path, f))
46-
]
38+
# Get list of all directories in ./scripts/documents
39+
documents_path = Path(__file__).parent / "documents"
40+
for dirpath, dirnames, filenames in os.walk(documents_path):
41+
subdir = dirpath.replace(str(documents_path), "").strip(os.sep)
42+
if len(filenames) > 0:
43+
subdirs = (
44+
subdir.split(os.sep) + [None] * 2
45+
) # Ensure we have at least two subdirs
4746

48-
if not file_paths:
49-
print("No text files found in the documents directory.")
50-
exit(1)
47+
attributes = {}
48+
# Openai doesn't allow querying by empty attributes, so we set them to "null"
49+
if subdirs[1]:
50+
attributes["city"] = subdirs[1]
51+
else:
52+
attributes["city"] = "null"
53+
if subdirs[0]:
54+
attributes["state"] = subdirs[0]
5155

52-
print("Uploading files to vector store...")
53-
file_streams = [
54-
open(os.path.join(documents_path, path), "rb") for path in file_paths
55-
]
56-
# Add the files to the vector store
57-
file_batch = client.vector_stores.file_batches.upload_and_poll(
58-
vector_store_id=vector_store.id, files=file_streams
59-
)
56+
file_ids = []
57+
for filename in filenames:
58+
file_path = Path(dirpath) / filename
6059

61-
print(f"Uploaded files to vector store '{vector_store.name}'.")
62-
print(
63-
f"Add the following to your .env file to use this vector store:\n"
64-
f"VECTOR_STORE_ID={vector_store.id}\n"
65-
)
60+
# Ensure the file is UTF-8 encoded
61+
# OpenAI rejects the file if not
62+
path = Path(file_path)
63+
path.write_text(path.read_text(encoding="utf-8"), encoding="utf-8")
64+
65+
print(f"Uploading {file_path} to vector store '{vector_store.name}'.")
66+
file = client.files.create(
67+
file=open(file_path, "rb"),
68+
purpose="assistants",
69+
)
70+
file_ids.append(file.id)
71+
72+
# Add files to the vector store
73+
batch_upload = client.vector_stores.file_batches.create(
74+
vector_store_id=vector_store.id,
75+
file_ids=file_ids,
76+
attributes=attributes, # Only take the first two subdirs
77+
)
78+
79+
print(f"Uploaded files to vector store '{vector_store.name}'.")
80+
print(
81+
f"Add the following to your .env file to use this vector store:\n"
82+
f"VECTOR_STORE_ID={vector_store.id}\n"
83+
)

0 commit comments

Comments
 (0)