Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions backend/scripts/create_vector_store.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,8 @@
import os
from pathlib import Path
from openai import OpenAI
from tenantfirstaid.shared import CONFIG


if Path(".env").exists():
from dotenv import load_dotenv

load_dotenv(override=True)

API_KEY = os.getenv("OPENAI_API_KEY", os.getenv("GITHUB_API_KEY"))

client = OpenAI(api_key=API_KEY)
client = OpenAI(api_key=CONFIG.openai_api_key or CONFIG.github_api_key)

# Note: we exit if the vector store already exists because
# OpenAI does not return the filenames of files in a vector store,
Expand Down Expand Up @@ -38,21 +30,19 @@
vector_store = client.vector_stores.create(name="Oregon Housing Law")

# Get all the files in ./documents
documents_path = Path("./scripts/documents")
documents_path = Path(__file__).parent / "scripts/documents"
file_paths = [
f
for f in os.listdir(documents_path)
if os.path.isfile(os.path.join(documents_path, f))
for f in documents_path.iterdir()
if f.is_file() and f.suffix.lower() in [".txt"]
]

if not file_paths:
print("No text files found in the documents directory.")
exit(1)

print("Uploading files to vector store...")
file_streams = [
open(os.path.join(documents_path, path), "rb") for path in file_paths
]
file_streams = [path.open("rb") for path in file_paths]
# Add the files to the vector store
file_batch = client.vector_stores.file_batches.upload_and_poll(
vector_store_id=vector_store.id, files=file_streams
Expand Down
5 changes: 4 additions & 1 deletion backend/scripts/generate_conversation/chat.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = "~=3.11"
# requires-python = "~=3.12"
# dependencies = [
# "dotenv",
# "openai",
# "pandas",
# ]
Expand All @@ -12,6 +14,7 @@
from pathlib import Path
import pandas as pd
from typing import Self
# import shared


BOT_INSTRUCTIONS = """Pretend you're a legal expert who giving advice about eviction notices in Oregon.
Expand Down
29 changes: 12 additions & 17 deletions backend/scripts/simple_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,19 @@
import os
from openai import OpenAI

from tenantfirstaid.shared import DEFAULT_INSTRUCTIONS

API_KEY = os.getenv("OPENAI_API_KEY", os.getenv("GITHUB_API_KEY"))
BASE_URL = os.getenv("MODEL_ENDPOINT", "https://api.openai.com/v1")
MODEL = os.getenv("MODEL_NAME", "o3")
MODEL_REASONING_EFFORT = os.getenv("MODEL_REASONING_EFFORT", "medium")
from tenantfirstaid.shared import CONFIG, DEFAULT_INSTRUCTIONS

client = OpenAI(
api_key=API_KEY,
base_url=BASE_URL,
api_key=CONFIG.openai_api_key or CONFIG.github_api_key,
base_url=CONFIG.model_endpoint,
)


VECTOR_STORE_ID = os.getenv("VECTOR_STORE_ID")
openai_tools = []

if VECTOR_STORE_ID:
openai_tools.append({"type": "file_search", "vector_store_ids": [VECTOR_STORE_ID]})
if CONFIG.vector_store_id is not None:
openai_tools.append(
{"type": "file_search", "vector_store_ids": [CONFIG.vector_store_id]}
)

# 1. Load the dataset - updated to use path relative to this script
script_dir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -47,10 +42,10 @@

# Use the Responses API with streaming
response = client.responses.create(
model=MODEL,
model=CONFIG.model_name,
input=input_messages,
instructions=DEFAULT_INSTRUCTIONS,
reasoning={"effort": MODEL_REASONING_EFFORT},
reasoning={"effort": CONFIG.model_reasoning_effort},
tools=openai_tools,
)

Expand Down Expand Up @@ -115,7 +110,7 @@

# 4. Print summary
print("\n===== EVALUATION SUMMARY =====")
print(f"Model evaluated: {MODEL}")
print(f"Model evaluated: {CONFIG.model_name}")
print(f"Number of samples: {len(samples)}")
print(f"Average score: {average_score:.2f}/10")
print(f"Average response time: {average_time:.2f} seconds")
Expand All @@ -129,8 +124,8 @@
with open(results_path, "w") as f:
json.dump(
{
"model": MODEL,
"reasoning_effort": MODEL_REASONING_EFFORT,
"model": CONFIG.model_name,
"reasoning_effort": CONFIG.model_reasoning_effort,
"average_score": average_score,
"samples": results,
},
Expand Down
13 changes: 3 additions & 10 deletions backend/tenantfirstaid/app.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
from pathlib import Path
from flask import Flask, jsonify, session
from flask import Flask, jsonify, session as flask_session
import os
import secrets


if Path(".env").exists():
from dotenv import load_dotenv

load_dotenv(override=True)

from .chat import ChatView

from .session import TenantSession
Expand All @@ -27,15 +20,15 @@

@app.get("/api/history")
def history():
session_id = session.get("session_id")
session_id = flask_session.get("session_id")
if not session_id:
return jsonify([])
return jsonify(tenant_session.get(session_id))


@app.post("/api/clear-session")
def clear_session():
session.clear()
flask_session.clear()
return jsonify({"success": True})


Expand Down
19 changes: 6 additions & 13 deletions backend/tenantfirstaid/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,21 @@
from flask.views import View
import os

from .shared import DEFAULT_INSTRUCTIONS, DATA_DIR

DATA_FILE = DATA_DIR / "chatlog.jsonl"

API_KEY = os.getenv("OPENAI_API_KEY", os.getenv("GITHUB_API_KEY"))
BASE_URL = os.getenv("MODEL_ENDPOINT", "https://api.openai.com/v1")
MODEL = os.getenv("MODEL_NAME", "o3")
MODEL_REASONING_EFFORT = os.getenv("MODEL_REASONING_EFFORT", "medium")
from .shared import DEFAULT_INSTRUCTIONS, DATA_DIR, CONFIG


class ChatView(View):
DATA_FILE = DATA_DIR / "chatlog.jsonl"

client = OpenAI(
api_key=API_KEY,
base_url=BASE_URL,
api_key=CONFIG.openai_api_key or CONFIG.github_api_key,
base_url=CONFIG.model_endpoint,
)

def __init__(self, session):
self.session = session

VECTOR_STORE_ID = os.getenv("VECTOR_STORE_ID")
VECTOR_STORE_ID = CONFIG.vector_store_id
NUM_FILE_SEARCH_RESULTS = os.getenv("NUM_FILE_SEARCH_RESULTS", 10)

self.openai_tools = []
Expand Down Expand Up @@ -79,10 +72,10 @@ def generate():
try:
# Use the new Responses API with streaming
response_stream = self.client.responses.create(
model=MODEL,
model=CONFIG.model_name,
input=input_messages,
instructions=DEFAULT_INSTRUCTIONS,
reasoning={"effort": MODEL_REASONING_EFFORT},
reasoning={"effort": CONFIG.model_reasoning_effort},
stream=True,
tools=self.openai_tools,
)
Expand Down
24 changes: 11 additions & 13 deletions backend/tenantfirstaid/session.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
import os
from valkey import Valkey
import simplejson as json
from .shared import CONFIG
from ipaddress import IPv4Address


class TenantSession:
def __init__(self):
_valkey_args = {
"host": IPv4Address(CONFIG.db_host),
"port": CONFIG.db_port,
"password": CONFIG.db_password,
"ssl": CONFIG.db_use_ssl,
}

print(
"Connecting to Valkey:",
{
"host": os.getenv("DB_HOST"),
"port": os.getenv("DB_PORT"),
"ssl": os.getenv("DB_USE_SSL"),
},
f"Connecting to Valkey: {_valkey_args}",
)
try:
self.db_con = Valkey(
host=os.getenv("DB_HOST", "127.0.0.1"),
port=os.getenv("DB_PORT", 6379),
password=os.getenv("DB_PASSWORD"),
ssl=False if os.getenv("DB_USE_SSL") == "false" else True,
)
self.db_con = Valkey(**_valkey_args)
self.db_con.ping()

except Exception as e:
Expand Down
67 changes: 64 additions & 3 deletions backend/tenantfirstaid/shared.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,77 @@
from collections import defaultdict
import os
from pathlib import Path
from warnings import warn
from dataclasses import dataclass, field
from typing import Optional

CACHE = defaultdict(list)

# Create a dedicated directory for persistent data in root's home directory
if Path(".env").exists():

# configuration and secrets are layered in a dataclass. From lowest to highest priority:
# 1. Dataclass defaults
# 2. Environment variables -- typically used by the Docker container
# 3. .env file in the backend directory (if it exists) -- typically used in local development
# TODO: generate/update .env.example from this dataclass
@dataclass(frozen=True)
class Config:
"""Configuration for the Oregon Tenant First Aid application."""

model_name: str = field(default="o3")
model_reasoning_effort: str = field(default="medium")
vector_store_id: Optional[str] = field(default=None)
feedback_password: Optional[str] = field(default=None)
github_api_key: Optional[str] = field(default=None)
openai_api_key: Optional[str] = field(default=None)
model_endpoint: str = field(default="https://api.openai.com/v1")
use_short_prompts: bool = field(default=True)
db_host: str = field(default="127.0.0.1")
db_port: int = field(default=6379)
db_use_ssl: bool = field(default=True)
db_username: Optional[str] = field(default=None)
db_password: Optional[str] = field(default=None)

def __post_init__(self):
"""Post-initialization to ensure one of the API KEYs is not None."""
if self.github_api_key is None and self.openai_api_key is None:
raise ValueError(
"Either GITHUB_API_KEY or OPENAI_API_KEY must be set in the environment variables."
)


# For development purposes, we expect the .env file to be in the backend directory
__shared_py_path = Path(__file__).resolve()
__backend_path = __shared_py_path.parent.parent
__dotenv_path = __backend_path / ".env"

if Path(__dotenv_path).exists():
from dotenv import load_dotenv

load_dotenv(override=True)
print(f"Loading environment variables from {__dotenv_path}")
load_dotenv(dotenv_path=__dotenv_path, override=True)
else:
warn(
f"No .env file found at {__dotenv_path.parent}. Using environment variables from the system."
)

# Load environment variables into the Config dataclass
CONFIG = Config(
**{
field.lower(): val
for field, val in os.environ.items()
if field.lower() in Config.__dataclass_fields__
}
)

# Create a dedicated directory for persistent data relative to the backend
# directory with a fallback to `/root/tenantfirstaid_data`
DATA_DIR = Path(os.getenv("PERSISTENT_STORAGE_DIR", "/root/tenantfirstaid_data"))
if not DATA_DIR.is_absolute():
new_data_dir = (__backend_path / DATA_DIR).resolve()
warn(
f"DATA_DIR {DATA_DIR} is not an absolute path. It will be relative to the backend directory ({new_data_dir})."
)
DATA_DIR = new_data_dir
DATA_DIR.mkdir(exist_ok=True)


Expand Down