Skip to content

Commit 60f68a9

Browse files
authored
Fix CI linting (#128)
* fix lint * add exit hotfix for llm-tests target --------- Signed-off-by: Jack Luar <[email protected]>
1 parent 15f7eab commit 60f68a9

File tree

18 files changed

+69
-60
lines changed

18 files changed

+69
-60
lines changed

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ format:
1414

1515
.PHONY: check
1616
check:
17-
@for folder in $(FOLDERS); do (cd $$folder && make check && cd ../); done
17+
@for folder in $(FOLDERS); do \
18+
(cd $$folder && make check && cd ../) || exit 1; \
19+
done
1820
@. ./backend/.venv/bin/activate && \
1921
pre-commit run --all-files
2022

backend/src/agents/retriever_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def agent(self, state: AgentState) -> dict[str, list[str]]:
121121
return {"tools": []}
122122

123123
if self.inbuilt_tool_calling:
124-
model = self.llm.bind_tools(self.tools, tool_choice="any")
124+
model = self.llm.bind_tools(self.tools, tool_choice="any") # type: ignore
125125

126126
tool_choice_chain = (
127127
ChatPromptTemplate.from_template(rephrase_prompt_template)

backend/src/chains/hybrid_retriever_chain.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,11 @@ def create_hybrid_retriever(self) -> None:
8484
if path_flag and database_name in os.listdir(path):
8585
if database_name in os.listdir(path):
8686
similarity_retriever_chain.create_vector_db()
87-
similarity_retriever_chain.vector_db.load_db(database_name)
87+
similarity_retriever_chain.vector_db.load_db(database_name) # type: ignore
8888
self.vector_db = similarity_retriever_chain.vector_db
89-
self.vector_db.processed_docs = similarity_retriever_chain.vector_db.get_documents()
89+
self.vector_db.processed_docs = ( # type: ignore
90+
similarity_retriever_chain.vector_db.get_documents() # type: ignore
91+
)
9092
else:
9193
similarity_retriever_chain.embed_docs(return_docs=True)
9294
self.vector_db = similarity_retriever_chain.vector_db

backend/src/chains/similarity_retriever_chain.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
class SimilarityRetrieverChain(BaseChain):
1515
count = 0
16+
1617
def __init__(
1718
self,
1819
llm_model: Optional[
@@ -64,38 +65,44 @@ def embed_docs(
6465
Optional[list[Document]],
6566
Optional[list[Document]],
6667
]:
68+
# Create the vector database if it does not exist
6769
if self.vector_db is None and extend_existing is False:
6870
self.create_vector_db()
6971

70-
if self.markdown_docs_path is not None and self.vector_db is not None:
72+
assert (
73+
self.vector_db is not None
74+
), "Vector DB must be created before embedding documents."
75+
if self.markdown_docs_path is not None:
7176
self.processed_docs = self.vector_db.add_md_docs(
7277
folder_paths=self.markdown_docs_path,
7378
chunk_size=self.chunk_size,
7479
return_docs=return_docs,
7580
)
7681

77-
if self.manpages_path is not None and self.vector_db is not None:
82+
if self.manpages_path is not None:
7883
self.processed_manpages = self.vector_db.add_md_manpages(
7984
folder_paths=self.manpages_path, return_docs=return_docs
8085
)
8186

82-
if self.other_docs_path is not None and self.vector_db is not None:
83-
for folder_name in self.other_docs_path:
84-
for root, _, files in os.walk(folder_name):
85-
for file in files:
86-
other_docs_path = os.path.join(root, file)
87-
if other_docs_path.endswith(".pdf"):
88-
self.processed_pdfs = self.vector_db.add_documents(
89-
file_paths=[other_docs_path],
90-
file_type="pdf",
91-
return_docs=return_docs,
92-
)
93-
else:
94-
raise ValueError(
95-
"File type not supported. Only PDFs are supported."
96-
)
97-
98-
if self.html_docs_path is not None and self.vector_db is not None:
87+
if self.other_docs_path is not None:
88+
pdf_files = [
89+
os.path.join(root, file)
90+
for folder_name in self.other_docs_path
91+
for root, _, files in os.walk(folder_name)
92+
for file in files
93+
if file.endswith(".pdf")
94+
]
95+
96+
if not pdf_files:
97+
raise ValueError("File type not supported. Only PDFs are supported.")
98+
99+
self.processed_pdfs = self.vector_db.add_documents(
100+
file_paths=pdf_files,
101+
file_type="pdf",
102+
return_docs=return_docs,
103+
)
104+
105+
if self.html_docs_path is not None:
99106
self.processed_html = self.vector_db.add_html(
100107
folder_paths=self.html_docs_path,
101108
return_docs=return_docs,

backend/src/vectorstores/faiss.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,12 @@ def save_db(self, name) -> None:
205205

206206
def load_db(self, name) -> None:
207207
load_path = f"{self.get_db_path()}/{name}"
208-
self._faiss_db = FAISS.load_local(load_path, self.embedding_model, allow_dangerous_deserialization=True)
208+
self._faiss_db = FAISS.load_local(
209+
load_path, self.embedding_model, allow_dangerous_deserialization=True
210+
)
209211

210212
def get_documents(self) -> list[Document]:
211-
return self._faiss_db.docstore._dict.values()
213+
return self._faiss_db.docstore._dict.values() # type: ignore
212214

213215
def process_json(self, folder_paths: list[str]) -> FAISS:
214216
logging.info("Processing json files...")

evaluation/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@ clean:
3030
llm-tests: clean
3131
@. .venv/bin/activate && \
3232
cd auto_evaluation && \
33-
./llm_tests.sh 2>&1 | tee llm_tests_output.txt
33+
./llm_tests.sh 2>&1 | tee llm_tests_output.txt; \
34+
exit $${PIPESTATUS[0]}

evaluation/auto_evaluation/src/models/vertex_ai.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
import instructor
77

8-
from typing import Any
9-
from vertexai.generative_models import GenerativeModel, HarmBlockThreshold, HarmCategory # type: ignore
8+
from typing import Any, Type
9+
from vertexai.generative_models import GenerativeModel, HarmBlockThreshold, HarmCategory
1010
from deepeval.models.base_model import DeepEvalBaseLLM
1111
from pydantic import BaseModel
1212

@@ -31,18 +31,20 @@ def load_model(self, *args, **kwargs):
3131
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
3232
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
3333
}
34+
if not self.model_name:
35+
raise ValueError("Model name must be specified for Google Vertex AI.")
3436

3537
return GenerativeModel(
3638
model_name=self.model_name,
3739
safety_settings=safety_settings,
3840
)
3941

40-
def generate(self, prompt: str, schema: BaseModel) -> Any:
42+
def generate(self, prompt: str, schema: Type[BaseModel]) -> Any:
4143
instructor_client = instructor.from_vertexai(
4244
client=self.load_model(),
4345
mode=instructor.Mode.VERTEXAI_TOOLS,
4446
)
45-
resp = instructor_client.messages.create( # type: ignore
47+
resp = instructor_client.messages.create(
4648
messages=[
4749
{
4850
"role": "user",
@@ -53,13 +55,12 @@ def generate(self, prompt: str, schema: BaseModel) -> Any:
5355
)
5456
return resp
5557

56-
async def a_generate(self, prompt: str, schema: BaseModel) -> Any:
58+
async def a_generate(self, prompt: str, schema: Any) -> Any:
5759
instructor_client = instructor.from_vertexai(
5860
client=self.load_model(),
5961
mode=instructor.Mode.VERTEXAI_TOOLS,
60-
_async=True,
6162
)
62-
resp = await instructor_client.messages.create( # type: ignore
63+
resp = await instructor_client.completions.create(
6364
messages=[
6465
{
6566
"role": "user",
@@ -71,7 +72,7 @@ async def a_generate(self, prompt: str, schema: BaseModel) -> Any:
7172
return resp
7273

7374
def get_model_name(self):
74-
return self.model_name
75+
return self.model_name or "model-not-specified"
7576

7677

7778
def main():
@@ -86,7 +87,7 @@ async def main_async():
8687
model = GoogleVertexAILangChain(model_name="gemini-1.5-pro-002")
8788
prompt = "Write me a joke"
8889
print(f"Prompt: {prompt}")
89-
response = await model.a_generate(prompt, Response)
90+
response = await model.a_generate(prompt, schema=Response)
9091
print(f"Response: {response}")
9192

9293

evaluation/human_evaluation/utils/sheets.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,10 @@ def write_responses(responses: list[str], row_numbers: list[int]) -> int:
106106
result = (
107107
service.spreadsheets()
108108
.values()
109-
.batchUpdate(spreadsheetId=SHEET_ID, body=body) # type: ignore
109+
.batchUpdate(spreadsheetId=SHEET_ID, body=body)
110110
.execute()
111111
)
112-
return result.get("totalUpdatedCells") # type: ignore
112+
return result.get("totalUpdatedCells")
113113
except HttpError as error:
114114
st.error("Failed to write responses to the Google Sheet.")
115115
st.error(f"An error occurred: {error}")

evaluation/human_evaluation/utils/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def update_gform(questions_descriptions: list[dict[str, str]]) -> None:
176176
form_body = {"requests": requests}
177177
forms_service.forms().batchUpdate(
178178
formId=GOOGLE_FORM_ID,
179-
body=form_body, # type: ignore
179+
body=form_body,
180180
).execute()
181181

182182
st.success("Google Form updated successfully.")

evaluation/init_google.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# type: ignore
12
import os
23
import argparse
34
from google.oauth2.service_account import Credentials
@@ -47,7 +48,7 @@ def create_google_form(form_title: str, user_email: str) -> str:
4748
- str: Google Form ID.
4849
"""
4950
form_metadata = {"info": {"title": form_title}}
50-
form = forms_service.forms().create(body=form_metadata).execute() # type: ignore
51+
form = forms_service.forms().create(body=form_metadata).execute()
5152
form_id = form["formId"]
5253
print(f"Created Form with ID: {form_id}")
5354

@@ -68,13 +69,13 @@ def create_google_sheet(sheet_title: str, user_email: str) -> str:
6869
- str: Google Sheet ID.
6970
"""
7071
sheet_metadata = {"properties": {"title": sheet_title}}
71-
sheet = sheets_service.spreadsheets().create(body=sheet_metadata).execute() # type: ignore
72+
sheet = sheets_service.spreadsheets().create(body=sheet_metadata).execute()
7273
sheet_id = sheet["spreadsheetId"]
7374
print(f"Created Sheet with ID: {sheet_id}")
7475

7576
gc = gspread.authorize(creds)
76-
sheet = gc.open_by_key(sheet_id).get_worksheet(0) # type: ignore
77-
sheet.append_row(["Questions", "Generated Answers"]) # type: ignore
77+
sheet = gc.open_by_key(sheet_id).get_worksheet(0)
78+
sheet.append_row(["Questions", "Generated Answers"])
7879

7980
share_file(sheet_id, user_email)
8081

0 commit comments

Comments
 (0)