|
1 |
| -import os |
2 | 1 | from pathlib import Path
|
3 | 2 | from openai import OpenAI
|
| 3 | +from tenantfirstaid.shared import CONFIG |
4 | 4 |
|
5 |
| - |
6 |
| -if Path(".env").exists(): |
7 |
| - from dotenv import load_dotenv |
8 |
| - |
9 |
| - load_dotenv(override=True) |
10 |
| - |
11 |
| -API_KEY = os.getenv("OPENAI_API_KEY", os.getenv("GITHUB_API_KEY")) |
12 |
| - |
13 |
| -client = OpenAI(api_key=API_KEY) |
| 5 | +client = OpenAI(api_key=CONFIG.openai_api_key or CONFIG.github_api_key) |
14 | 6 |
|
15 | 7 | # Note: we exit if the vector store already exists because
|
16 | 8 | # OpenAI does not return the filenames of files in a vector store,
|
|
38 | 30 | vector_store = client.vector_stores.create(name="Oregon Housing Law")
|
39 | 31 |
|
40 | 32 | # Get all the files in ./documents
|
41 |
| - documents_path = Path("./scripts/documents") |
| 33 | + documents_path = Path(__file__).parent / "scripts/documents" |
42 | 34 | file_paths = [
|
43 | 35 | f
|
44 |
| - for f in os.listdir(documents_path) |
45 |
| - if os.path.isfile(os.path.join(documents_path, f)) |
| 36 | + for f in documents_path.iterdir() |
| 37 | + if f.is_file() and f.suffix.lower() in [".txt"] |
46 | 38 | ]
|
47 | 39 |
|
48 | 40 | if not file_paths:
|
49 | 41 | print("No text files found in the documents directory.")
|
50 | 42 | exit(1)
|
51 | 43 |
|
52 | 44 | print("Uploading files to vector store...")
|
53 |
| - file_streams = [ |
54 |
| - open(os.path.join(documents_path, path), "rb") for path in file_paths |
55 |
| - ] |
| 45 | + file_streams = [path.open("rb") for path in file_paths] |
56 | 46 | # Add the files to the vector store
|
57 | 47 | file_batch = client.vector_stores.file_batches.upload_and_poll(
|
58 | 48 | vector_store_id=vector_store.id, files=file_streams
|
|
0 commit comments