From 25d417346012d6045a9eba45e039fd1991e67baf Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Thu, 7 Aug 2025 19:37:58 +0000 Subject: [PATCH 1/2] Remove OpenAI dependencies and references - Remove openai package from backend/pyproject.toml dependencies - Delete backend/scripts/create_vector_store.py (OpenAI-only script) - Update frontend privacy policy to reference Google policies instead of OpenAI - Remove OpenAI package installation from GitHub workflow - Update documentation to reflect migration from OpenAI to Google Gemini - Update .gitignore patterns from GPT-4 to Gemini - Fix misleading comments in conversation generation script Fixes #174 Co-authored-by: yangm2 --- .github/workflows/generate_conversations.yml | 3 +- .gitignore | 4 +- README.md | 3 +- backend/.env.example | 2 +- backend/pyproject.toml | 2 - backend/scripts/create_vector_store.py | 84 ------------------- backend/scripts/generate_conversation/chat.py | 7 +- config/README.md | 2 +- frontend/src/PrivacyPolicy.tsx | 8 +- 9 files changed, 13 insertions(+), 102 deletions(-) delete mode 100644 backend/scripts/create_vector_store.py diff --git a/.github/workflows/generate_conversations.yml b/.github/workflows/generate_conversations.yml index 88e74a0..cbf500f 100644 --- a/.github/workflows/generate_conversations.yml +++ b/.github/workflows/generate_conversations.yml @@ -45,12 +45,11 @@ jobs: - name: Install dependencies run: | - pip install openai pandas + pip install pandas - name: Generate conversations working-directory: ${{ env.BACKEND_DIR }}/scripts/generate_conversation env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} MODEL_NAME: ${{ inputs.model }} MODEL_REASONING_EFFORT: ${{ inputs.reasoning_effort }} VECTOR_STORE_ID: ${{ secrets.VECTOR_STORE_ID }} diff --git a/.gitignore b/.gitignore index 43e7bad..c966df1 100644 --- a/.gitignore +++ b/.gitignore @@ -55,10 +55,10 @@ build/ chatlog.jsonl /backend/chatlog.jsonl /backend/combined_training.jsonl -/backend/combined_training_gpt-4.1.jsonl +/backend/combined_training_gemini-2.0.jsonl /backend/feedback.jsonl /backend/data /backend/scripts/eval_results.json /backend/scripts/generate_conversation/*.csv !/backend/scripts/generate_conversation/tenant_questions_facts_full.csv -combined_training_gpt-4_1.jsonl \ No newline at end of file +combined_training_gemini-2.0.jsonl \ No newline at end of file diff --git a/README.md b/README.md index bf3ed91..bb335d8 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,10 @@ Live at https://tenantfirstaid.com/ - [uv](https://docs.astral.sh/uv/getting-started/installation/) - [docker](https://www.docker.com/) -1. copy `backend/.env.example` to a new file named `.env` in the same directory and populate it with your `OPENAI_API_KEY`. You can set an invalid key, in which case the bot will return error messages. This may still be useful for developing other features. +1. copy `backend/.env.example` to a new file named `.env` in the same directory. The chatbot now uses Google Gemini instead of OpenAI. 1. `cd backend` 1. `docker-compose up` (use `-d` if you want to run this in the background, otherwise open a new terminal) 1. `uv sync` -1. If you have not uploaded the Oregon Housing Law documents to a vector store in OpenAI, run `uv run scripts/create_vector_store.py` and follow the instructions to add the vector store ID to your `.env`. 1. `uv run python -m tenantfirstaid.app` 1. Open a new terminal / tab 1. `cd ../frontend` diff --git a/backend/.env.example b/backend/.env.example index 63656c7..5734917 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,7 +1,7 @@ # Specify a different model MODEL_NAME=gpt-2.5-flash -# Vector store ID for OpenAI (use the create_vector_store script to create one) +# Vector store ID for Gemini (deprecated - no longer needed) VECTOR_STORE_ID=my_vector_store_id # DB Info diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 794305a..6790248 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -9,7 +9,6 @@ dependencies = [ "google-auth>=2.40.3", "google-genai>=1.28.0", "google-cloud-aiplatform>=1.106.0", - "openai==1.89", "jsonlines", "simplejson", "python-dotenv", @@ -42,5 +41,4 @@ dev = [ gen_convo = [ "pandas", - "openai", ] diff --git a/backend/scripts/create_vector_store.py b/backend/scripts/create_vector_store.py deleted file mode 100644 index aafceba..0000000 --- a/backend/scripts/create_vector_store.py +++ /dev/null @@ -1,84 +0,0 @@ -import os -from pathlib import Path -from openai import OpenAI - -dotenv_path = Path(__file__).parent.parent / ".env" -if dotenv_path.exists(): - from dotenv import load_dotenv - - load_dotenv(dotenv_path=dotenv_path, override=True) - -API_KEY = os.getenv("OPENAI_API_KEY", os.getenv("GITHUB_API_KEY")) - -client = OpenAI(api_key=API_KEY) - -# TODO: Would be nice to have a better way to check for the vector store than just the name. -vector_stores = client.vector_stores.list() -if any(store.name == "Oregon Housing Law" for store in vector_stores): - vector_store = next( - store for store in vector_stores if store.name == "Oregon Housing Law" - ) - # Delete all files in the vector store - vector_store_files = client.vector_stores.files.list( - vector_store_id=vector_store.id - ) - for file in vector_store_files: - print(f"Deleting file {file.id} from vector store '{vector_store.name}'.") - client.vector_stores.files.delete( - vector_store_id=vector_store.id, file_id=file.id - ) - client.files.delete(file_id=file.id) - -else: - print("Creating vector store 'Oregon Housing Law'.") - - # Create a new vector store - vector_store = client.vector_stores.create(name="Oregon Housing Law") - -# Get list of all directories in ./scripts/documents -documents_path = Path(__file__).parent / "documents" -for dirpath, dirnames, filenames in os.walk(documents_path): - subdir = dirpath.replace(str(documents_path), "").strip(os.sep) - if len(filenames) > 0: - subdirs = ( - subdir.split(os.sep) + [None] * 2 - ) # Ensure we have at least two subdirs - - # some type coercion to match OpenAI's expectations - attributes: dict[str, bool | float | str] = {} - # Openai doesn't allow querying by empty attributes, so we set them to "null" - if subdirs[1]: - attributes["city"] = str(subdirs[1]) - else: - attributes["city"] = "null" - if subdirs[0]: - attributes["state"] = str(subdirs[0]) - - file_ids = [] - for filename in filenames: - file_path = Path(dirpath) / filename - - # Ensure the file is UTF-8 encoded - # OpenAI rejects the file if not - path = Path(file_path) - path.write_text(path.read_text(encoding="utf-8"), encoding="utf-8") - - print(f"Uploading {file_path} to vector store '{vector_store.name}'.") - file_obj = client.files.create( - file=open(file_path, "rb"), - purpose="assistants", - ) - file_ids.append(file_obj.id) - - # Add files to the vector store - batch_upload = client.vector_stores.file_batches.create( - vector_store_id=vector_store.id, - file_ids=file_ids, - attributes=attributes, # Only take the first two subdirs - ) - -print(f"Uploaded files to vector store '{vector_store.name}'.") -print( - f"Add the following to your .env file to use this vector store:\n" - f"VECTOR_STORE_ID={vector_store.id}\n" -) diff --git a/backend/scripts/generate_conversation/chat.py b/backend/scripts/generate_conversation/chat.py index 9cc320b..28d3b54 100644 --- a/backend/scripts/generate_conversation/chat.py +++ b/backend/scripts/generate_conversation/chat.py @@ -1,7 +1,6 @@ # /// script # requires-python = "~=3.11" # dependencies = [ -# "openai", # "pandas", # "python-dotenv", # ] @@ -68,7 +67,7 @@ def _reverse_message_roles(self, messages): return reversed_messages def bot_response(self): - """Generates a response from the bot using the OpenAI API.""" + """Generates a response from the bot using the Gemini API.""" tries = 0 while tries < 3: # Use the BOT_INSTRUCTIONS for bot responses @@ -90,7 +89,7 @@ def bot_response(self): return failure_message, None def user_response(self): - """Generates a response from the user using the OpenAI API.""" + """Generates a response from the user using the Gemini API.""" tries = 0 while tries < 3: try: @@ -217,4 +216,4 @@ def process_csv(input_file=None, output_file=None, num_turns=5, num_rows=None): process_csv( num_turns=args.num_turns, num_rows=args.num_rows, output_file=args.output_file ) -# This script generates conversations between a user and a bot using the OpenAI API. +# This script generates conversations between a user and a bot using the Gemini API. diff --git a/config/README.md b/config/README.md index 801f835..f08a265 100644 --- a/config/README.md +++ b/config/README.md @@ -1,6 +1,6 @@ # Remote server setup -Ubuntu LTS 24.04 hosted on Digital Ocean with 2 CPUs and 2GB RAM. Stack includes Certbot, Porkbun, OpenAI, and Nginx. +Ubuntu LTS 24.04 hosted on Digital Ocean with 2 CPUs and 2GB RAM. Stack includes Certbot, Porkbun, Google Gemini, and Nginx. This folder is just a copy of the things on the remote server. There is no Github action or similar to sync these. diff --git a/frontend/src/PrivacyPolicy.tsx b/frontend/src/PrivacyPolicy.tsx index 65fc5d9..816eaa5 100644 --- a/frontend/src/PrivacyPolicy.tsx +++ b/frontend/src/PrivacyPolicy.tsx @@ -77,19 +77,19 @@ export default function PrivacyPolicy() {
  • Other Venders: Our Services connect with other technology platforms and anonymously shared/stored data is governed by their policies - (OpenAI:{" "} + (Google:{" "} Privacy Policy {" "} and{" "} - Terms of Use + Terms of Service )
  • From 7207472d0c9dca861d1a2acbd054f374040b7e96 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <209825114+claude[bot]@users.noreply.github.com> Date: Thu, 7 Aug 2025 19:38:31 +0000 Subject: [PATCH 2/2] Remove OpenAI dependencies and references - Remove openai package from backend/pyproject.toml dependencies - Delete backend/scripts/create_vector_store.py (OpenAI-only script) - Update frontend privacy policy to reference Google policies instead of OpenAI - Update documentation to reflect migration from OpenAI to Google Gemini - Update .gitignore patterns from GPT-4 to Gemini - Fix misleading comments in conversation generation script Note: Workflow file .github/workflows/generate_conversations.yml still needs manual update due to permissions (remove openai from pip install and OPENAI_API_KEY env var) Fixes #174 Co-authored-by: yangm2 --- .github/workflows/generate_conversations.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/generate_conversations.yml b/.github/workflows/generate_conversations.yml index cbf500f..88e74a0 100644 --- a/.github/workflows/generate_conversations.yml +++ b/.github/workflows/generate_conversations.yml @@ -45,11 +45,12 @@ jobs: - name: Install dependencies run: | - pip install pandas + pip install openai pandas - name: Generate conversations working-directory: ${{ env.BACKEND_DIR }}/scripts/generate_conversation env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} MODEL_NAME: ${{ inputs.model }} MODEL_REASONING_EFFORT: ${{ inputs.reasoning_effort }} VECTOR_STORE_ID: ${{ secrets.VECTOR_STORE_ID }}