Skip to content

Commit 6d35e35

Browse files
committed
Change course embeddings to include year level
1 parent d3ed76a commit 6d35e35

File tree

6 files changed

+2199
-10
lines changed

6 files changed

+2199
-10
lines changed

course-matrix/backend/src/constants/constants.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ export const yearToCode = (year: number) => {
3939

4040
// Set minimum results wanted for a similarity search on the associated namespace.
4141
export const namespaceToMinResults = new Map();
42-
namespaceToMinResults.set("courses", 10);
42+
namespaceToMinResults.set("courses_v2", 10);
4343
namespaceToMinResults.set("offerings", 16); // Typically, more offering info is wanted.
4444
namespaceToMinResults.set("prerequisites", 5);
4545
namespaceToMinResults.set("corequisites", 5);

course-matrix/backend/src/constants/promptKeywords.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// Keywords related to each namespace
22
export const NAMESPACE_KEYWORDS = {
3-
courses: [
3+
courses_v2: [
44
"course",
55
"class",
66
"description",

course-matrix/backend/src/controllers/aiController.ts

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ function analyzeQuery(query: string): {
5858

5959
// If a course code is detected, add tehse namespaces
6060
if (containsCourseCode) {
61-
if (!relevantNamespaces.includes("courses"))
62-
relevantNamespaces.push("courses");
61+
if (!relevantNamespaces.includes("courses_v2"))
62+
relevantNamespaces.push("courses_v2");
6363
if (!relevantNamespaces.includes("offerings"))
6464
relevantNamespaces.push("offerings");
6565
if (!relevantNamespaces.includes("prerequisites"))
@@ -70,8 +70,8 @@ function analyzeQuery(query: string): {
7070
if (DEPARTMENT_CODES.some((code) => lowerQuery.includes(code))) {
7171
if (!relevantNamespaces.includes("departments"))
7272
relevantNamespaces.push("departments");
73-
if (!relevantNamespaces.includes("courses"))
74-
relevantNamespaces.push("courses");
73+
if (!relevantNamespaces.includes("courses_v2"))
74+
relevantNamespaces.push("courses_v2");
7575
}
7676

7777
// If search is required at all
@@ -83,7 +83,7 @@ function analyzeQuery(query: string): {
8383
// If no specific namespaces identified & search required, then search all
8484
if (requiresSearch && relevantNamespaces.length === 0) {
8585
relevantNamespaces.push(
86-
"courses",
86+
"courses_v2",
8787
"offerings",
8888
"prerequisites",
8989
"corequisites",
@@ -154,7 +154,7 @@ async function reformulateQuery(
154154
apiKey: process.env.OPENAI_API_KEY,
155155
});
156156

157-
console.log("History: ", conversationHistory);
157+
// console.log("History: ", conversationHistory);
158158

159159
// Create messages array with the correct type structure
160160
const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [
@@ -187,7 +187,13 @@ async function reformulateQuery(
187187
Output: "What are the course names of course codes: MGTA01, CSCA08, MATA31, MATA35?"
188188
189189
User: "How are you doing today?"
190-
Output: "How are you doing today?"`,
190+
Output: "How are you doing today?"
191+
192+
User: "Give 2nd year math courses."
193+
Output: "What are some 2nd year math courses?"
194+
195+
User: "Give first year math courses."
196+
Output: "What are some 1st year math courses?"`,
191197
},
192198
];
193199

@@ -209,7 +215,7 @@ async function reformulateQuery(
209215
model: "gpt-4o-mini",
210216
messages: messages,
211217
temperature: 0.1, // Lower temperature for more consistent, focused queries
212-
max_tokens: 150, // Limit response length
218+
max_tokens: latestQuery.length * 3, // Limit response length. Proportional to user input.
213219
top_p: 0.5, // Reduced top_p for more focused outputs
214220
});
215221

course-matrix/backend/src/utils/embeddings.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ async function processPDF(filePath: string, namespace: string) {
9797
// processCSV("../data/tables/offerings_summer_2025.csv", "offerings")
9898
// processCSV("../data/tables/offerings_winter_2026.csv", "offerings")
9999
// processCSV("../data/tables/departments.csv", "departments")
100+
// processCSV("../data/tables/courses_with_year.csv", "courses_v2")
100101

101102
console.log("embeddings done.");
102103

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import csv
2+
import sys
3+
4+
def get_year_level(code):
5+
if len(code) >= 4:
6+
fourth_char = code[3]
7+
if fourth_char == 'A':
8+
return "1st year"
9+
elif fourth_char == 'B':
10+
return "2nd year"
11+
elif fourth_char == 'C':
12+
return "3rd year"
13+
elif fourth_char == 'D':
14+
return "4th year"
15+
return "" # Default empty string if code is too short or doesn't match
16+
17+
def process_csv(input_file, output_file):
18+
with open(input_file, 'r', newline='', encoding='utf-8') as infile:
19+
reader = csv.DictReader(infile)
20+
fieldnames = reader.fieldnames + ['year_level']
21+
22+
with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
23+
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
24+
writer.writeheader()
25+
26+
for row in reader:
27+
row['year_level'] = get_year_level(row['code'])
28+
writer.writerow(row)
29+
30+
print(f"Successfully processed {input_file} and created {output_file} with year_level column added.")
31+
32+
if __name__ == "__main__":
33+
if len(sys.argv) > 2:
34+
process_csv(sys.argv[1], sys.argv[2])
35+
else:
36+
input_file = "./tables/courses.csv"
37+
output_file = "./tables/courses_with_year.csv"
38+
process_csv(input_file, output_file)

0 commit comments

Comments
 (0)