Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion course-matrix/backend/src/constants/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export const yearToCode = (year: number) => {

// Set minimum results wanted for a similarity search on the associated namespace.
export const namespaceToMinResults = new Map();
namespaceToMinResults.set("courses_v2", 10);
namespaceToMinResults.set("courses_v3", 10);
namespaceToMinResults.set("offerings", 16); // Typically, more offering info is wanted.
namespaceToMinResults.set("prerequisites", 5);
namespaceToMinResults.set("corequisites", 5);
Expand Down
37 changes: 36 additions & 1 deletion course-matrix/backend/src/constants/promptKeywords.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Keywords related to each namespace
export const NAMESPACE_KEYWORDS = {
courses_v2: [
courses_v3: [
"course",
"class",
"description",
Expand Down Expand Up @@ -61,6 +61,41 @@ export const NAMESPACE_KEYWORDS = {
programs: ["program", "major", "minor", "specialist", "degree", "stream"],
};

export const BREADTH_REQUIREMENT_KEYWORDS = {
ART_LIT_LANG: [
"ART_LIT_LANG",
"art literature",
"arts literature",
"art language",
"arts language",
"literature language",
"art literature language",
"arts literature language",
],
HIS_PHIL_CUL: [
"HIS_PHIL_CUL",
"history philosophy culture",
"history, philosophy, culture",
"history, philosophy, and culture",
"history, philosophy",
"history philosophy",
"philosophy culture",
"philosophy, culture",
"history culture",
"History, Philosophy and Cultural Studies",
],
SOCIAL_SCI: ["SOCIAL_SCI", "social science", "social sciences"],
NAT_SCI: ["NAT_SCI", "natural science", "natural sciences"],
QUANT: ["QUANT", "quantitative reasoning"],
};

export const YEAR_LEVEL_KEYWORDS = {
first_year: ["first year", "first-year", "A-level", "A level", "1st year"],
second_year: ["second year", "second-year", "B-level", "B level", "2nd year"],
third_year: ["third year", "third-year", "C-level", "C level", "3rd year"],
fourth_year: ["fourth year", "fourth-year", "D-level", "D level", "4th year"],
};

// General academic terms that might indicate a search is needed
export const GENERAL_ACADEMIC_TERMS = ["credit", "enroll", "drop"];

Expand Down
102 changes: 91 additions & 11 deletions course-matrix/backend/src/controllers/aiController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@ import {
DEPARTMENT_CODES,
ASSISTANT_TERMS,
USEFUL_INFO,
BREADTH_REQUIREMENT_KEYWORDS,
YEAR_LEVEL_KEYWORDS,
} from "../constants/promptKeywords";
import { CHATBOT_MEMORY_THRESHOLD, codeToYear } from "../constants/constants";
import { namespaceToMinResults } from "../constants/constants";
import OpenAI from "openai";
import { convertBreadthRequirement } from "../utils/convert-breadth-requirement";
import { convertYearLevel } from "../utils/convert-year-level";

const openai = createOpenAI({
baseURL: process.env.OPENAI_BASE_URL,
Expand Down Expand Up @@ -58,8 +62,8 @@ function analyzeQuery(query: string): {

// If a course code is detected, add tehse namespaces
if (containsCourseCode) {
if (!relevantNamespaces.includes("courses_v2"))
relevantNamespaces.push("courses_v2");
if (!relevantNamespaces.includes("courses_v3"))
relevantNamespaces.push("courses_v3");
if (!relevantNamespaces.includes("offerings"))
relevantNamespaces.push("offerings");
if (!relevantNamespaces.includes("prerequisites"))
Expand All @@ -70,8 +74,8 @@ function analyzeQuery(query: string): {
if (DEPARTMENT_CODES.some((code) => lowerQuery.includes(code))) {
if (!relevantNamespaces.includes("departments"))
relevantNamespaces.push("departments");
if (!relevantNamespaces.includes("courses_v2"))
relevantNamespaces.push("courses_v2");
if (!relevantNamespaces.includes("courses_v3"))
relevantNamespaces.push("courses_v3");
}

// If search is required at all
Expand All @@ -83,7 +87,7 @@ function analyzeQuery(query: string): {
// If no specific namespaces identified & search required, then search all
if (requiresSearch && relevantNamespaces.length === 0) {
relevantNamespaces.push(
"courses_v2",
"courses_v3",
"offerings",
"prerequisites",
"corequisites",
Expand All @@ -106,6 +110,7 @@ async function searchSelectedNamespaces(
query: string,
k: number,
namespaces: string[],
filters?: Object,
): Promise<Document[]> {
let allResults: Document[] = [];

Expand All @@ -127,6 +132,7 @@ async function searchSelectedNamespaces(
const results = await namespaceStore.similaritySearch(
query,
Math.max(k, namespaceToMinResults.get(namespace)),
namespace === "courses_v3" ? filters : undefined,
);
console.log(`Found ${results.length} results in namespace: ${namespace}`);
allResults = [...allResults, ...results];
Expand Down Expand Up @@ -172,16 +178,18 @@ async function reformulateQuery(
- DO replace pronouns and references with specific names and identifiers
- DO include course codes, names and specific details for academic entities
- If the query is not about university courses & offerings, return exactly a copy of the user's query.
- Append "code: " before course codes For example: "CSCC01" -> "code: CSCC01"
- If a course year level is written as "first year", "second year", etc. Then replace "first" with "1st" and "second" with "2nd" etc.

Examples:
User: "When is it offered?"
Output: "When is CSCA48 Introduction to Computer Science offered in the 2024-2025 academic year?"
Output: "When is CSCA48 offered in the 2024-2025 academic year?"

User: "Tell me more about that"
Output: "What are the details, descriptions, and requirements for MATA31 Calculus I?"
Output: "What are the details, descriptions, and requirements for MATA31?"

User: "Who teaches it?"
Output: "Who are the instructors for MGEA02 Introduction to Microeconomics at UTSC?"
Output: "Who are the instructors for MGEA02 at UTSC?"

User: "What are the course names of those codes?"
Output: "What are the course names of course codes: MGTA01, CSCA08, MATA31, MATA35?"
Expand All @@ -192,8 +200,13 @@ async function reformulateQuery(
User: "Give 2nd year math courses."
Output: "What are some 2nd year math courses?"

User: "Give first year math courses."
Output: "What are some 1st year math courses?"`,
User: "Give third year math courses."
Output: "What are some 3rd year math courses?"

User: "What breadth requirement does CSCC01 satisfy?"
Output: "What breadth requirement does code: CSCC01 satisfy?"

`,
},
];

Expand Down Expand Up @@ -227,6 +240,69 @@ async function reformulateQuery(
}
}

// Determines whether to apply metadata filtering based on user query.
function includeFilters(query: string) {
const lowerQuery = query.toLocaleLowerCase();
const relaventBreadthRequirements: string[] = [];
const relaventYearLevels: string[] = [];

Object.entries(BREADTH_REQUIREMENT_KEYWORDS).forEach(
([namespace, keywords]) => {
if (keywords.some((keyword) => lowerQuery.includes(keyword))) {
relaventBreadthRequirements.push(convertBreadthRequirement(namespace));
}
},
);

Object.entries(YEAR_LEVEL_KEYWORDS).forEach(([namespace, keywords]) => {
if (keywords.some((keyword) => lowerQuery.includes(keyword))) {
relaventYearLevels.push(convertYearLevel(namespace));
}
});

let filter = {};
if (relaventBreadthRequirements.length > 0 && relaventYearLevels.length > 0) {
filter = {
$and: [
{
$or: relaventBreadthRequirements.map((req) => ({
breadth_requirement: { $eq: req },
})),
},
{
$or: relaventYearLevels.map((yl) => ({ year_level: { $eq: yl } })),
},
],
};
} else if (relaventBreadthRequirements.length > 0) {
filter = {
$or: relaventBreadthRequirements.map((req) => ({
breadth_requirement: { $eq: req },
})),
};
} else if (relaventYearLevels.length > 0) {
filter = {
$or: relaventYearLevels.map((yl) => ({ year_level: { $eq: yl } })),
};
}
return filter;
}

/**
* @description Handles user queries and generates responses using GPT-4o, with optional knowledge retrieval.
*
* @param {Request} req - The Express request object, containing:
* @param {Object[]} req.body.messages - Array of message objects representing the conversation history.
* @param {string} req.body.messages[].role - The role of the message sender (e.g., "user", "assistant").
* @param {Object[]} req.body.messages[].content - An array containing message content objects.
* @param {string} req.body.messages[].content[].text - The actual text of the message.
*
* @param {Response} res - The Express response object used to stream the generated response.
*
* @returns {void} Responds with a streamed text response of the AI output
*
* @throws {Error} If query reformulation or knowledge retrieval fails.
*/
export const chat = asyncHandler(async (req: Request, res: Response) => {
const { messages } = req.body;
const latestMessage = messages[messages.length - 1].content[0].text;
Expand Down Expand Up @@ -258,11 +334,15 @@ export const chat = asyncHandler(async (req: Request, res: Response) => {
)}`,
);

const filters = includeFilters(reformulatedQuery);
// console.log("Filters: ", JSON.stringify(filters))

// Search only relevant namespaces
const searchResults = await searchSelectedNamespaces(
reformulatedQuery,
3,
relevantNamespaces,
Object.keys(filters).length === 0 ? undefined : filters,
);
// console.log("Search Results: ", searchResults);

Expand All @@ -274,7 +354,7 @@ export const chat = asyncHandler(async (req: Request, res: Response) => {
console.log("Query does not require knowledge retrieval, skipping search");
}

// console.log("CONTEXT: ", context);
console.log("CONTEXT: ", context);

const result = streamText({
model: openai("gpt-4o-mini"),
Expand Down
8 changes: 8 additions & 0 deletions course-matrix/backend/src/routes/aiRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,13 @@ import { authRouter } from "./authRouter";

export const aiRouter = express.Router();

/**
* @route POST /api/ai/chat
* @description Handles user queries and generates responses using GPT-4o, with optional knowledge retrieval.
*/
aiRouter.post("/chat", authRouter, chat);
/**
* @route POST /api/ai/test-similarity-search
* @description Test vector database similarity search feature
*/
aiRouter.post("/test-similarity-search", testSimilaritySearch);
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export const convertBreadthRequirement = (code: string) => {
if (code === "ART_LIT_LANG") return "Arts, Literature and Language";
else if (code === "HIS_PHIL_CUL")
return "History, Philosophy and Cultural Studies";
else if (code === "SOCIAL_SCI") return "Social and Behavioral Sciences";
else if (code === "NAT_SCI") return "Natural Sciences";
else if (code === "QUANT") return "Quantitative Reasoning";
else return "";
};
7 changes: 7 additions & 0 deletions course-matrix/backend/src/utils/convert-year-level.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export const convertYearLevel = (code: string) => {
if (code === "first_year") return "1st year";
else if (code === "second_year") return "2nd year";
else if (code === "third_year") return "3rd year";
else if (code === "fourth_year") return "4th year";
else return "";
};
31 changes: 31 additions & 0 deletions course-matrix/backend/src/utils/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { PineconeStore } from "@langchain/pinecone";
import { Pinecone } from "@pinecone-database/pinecone";
import config from "../config/config";
import path from "path";
import { convertBreadthRequirement } from "./convert-breadth-requirement";

console.log("Running embeddings process...");

Expand Down Expand Up @@ -37,6 +38,35 @@ async function processCSV(filePath: string, namespace: string) {
});
}

// Generate embeddings for courses.csv
async function processCoursesCSV(filePath: string, namespace: string) {
const fileName = path.basename(filePath);
const loader = new CSVLoader(filePath);
let docs = await loader.load();

docs = docs.map((doc, index) => ({
...doc,
metadata: {
...doc.metadata,
source: fileName,
row: index + 1,
breadth_requirement: convertBreadthRequirement(
doc.pageContent.split("\n")[1].split(": ")[1],
),
year_level: doc.pageContent.split("\n")[10].split(": ")[1],
},
}));
console.log("Sample doc: ", docs[0]);

const index = pinecone.Index(process.env.PINECONE_INDEX_NAME!);

// Store each row as an individual embedding
await PineconeStore.fromDocuments(docs, embeddings, {
pineconeIndex: index as any,
namespace: namespace,
});
}

// Generate embeddings for pdfs
async function processPDF(filePath: string, namespace: string) {
const fileName = path.basename(filePath);
Expand Down Expand Up @@ -98,6 +128,7 @@ async function processPDF(filePath: string, namespace: string) {
// processCSV("../data/tables/offerings_winter_2026.csv", "offerings")
// processCSV("../data/tables/departments.csv", "departments")
// processCSV("../data/tables/courses_with_year.csv", "courses_v2")
// processCoursesCSV("../data/tables/courses_with_year.csv", "courses_v3");

console.log("embeddings done.");

Expand Down