Merge remote-tracking branch 'origin/assistants' into react-setup

ConnerWithAnE · ConnerWithAnE · commit 0c60279c716f · 2024-12-04T17:11:56.000-06:00
diff --git a/.gitignore b/.gitignore
@@ -54,6 +54,7 @@ test/
 # Miscellaneous
 .temp/
 .cache/
+*.pdf
 
 # Database Files
 database.db
diff --git a/README.md b/README.md
@@ -50,6 +50,19 @@ npm run start:dev
 
 This will start a development server with the `ts-node` and `nodemon` packages. This allows for easy development via cold-reloading. `ts-node` allows for running the typescript code without the need for compliation while `nodemon` monitors for changes to any `.ts` or `.js` files.
 
+## Testing
+
+Tests are stored in the `test/` directory and end with `*.test.ts`. Testing is done using jest.js and is setup with npm.
+
+To run the current tests run this command from the main top of the project directory
+```sh
+npm test
+```
+or
+```sh
+npm run test
+```
+
 #### React (Front End)
 
 Simply run the below command in the `web` directory
diff --git a/server/package.json b/server/package.json
@@ -15,6 +15,7 @@
   "devDependencies": {
     "@types/axios": "^0.14.4",
     "@types/jest": "^29.5.14",
+    "@types/multer": "^1.4.12",
     "@types/jsonwebtoken": "^9.0.7",
     "@types/node": "^22.7.6",
     "@types/sqlite3": "^3.1.11",
@@ -33,6 +34,7 @@
     "cors": "^2.8.5",
     "dotenv": "^16.4.5",
     "express": "^4.21.1",
+    "multer": "^1.4.5-lts.1",
     "jsonwebtoken": "^9.0.2",
     "openai": "^4.67.3",
     "sqlite": "^5.1.1",
diff --git a/server/src/gpt-controller.ts b/server/src/gpt-controller.ts
@@ -3,8 +3,11 @@ import { GPTModel } from "./enums";
 import { FileObject } from "openai/resources";
 import path, { resolve } from "path";
 import fs from "fs";
+import { AssistantBody, GPTResponse, ThreadMessage, GPTData, Testing, TestLocation } from "./types";
+import { prompt, questions } from "./prompts.data";
+import { threadId } from "worker_threads";
 
-class GPTController {
+export class GPTController {
   private static client: OpenAI;
   private model: GPTModel;
 
@@ -17,15 +20,121 @@ class GPTController {
     this.model = model;
   }
 
-  async StreamReq() {
-    const stream = await GPTController.client.chat.completions.create({
+  async runGPTAnalysis(filePaths: string[]): Promise<GPTResponse[]> {
+    const assistantParams: AssistantBody = {
+      name: "Radiation Effects Researcher",
+      instructions:
+        "You are a radiation effects reasearcher. Use your knowledge to give very concise and numerical answers to the questions. Please do not give citations.",
       model: this.model,
-      messages: [{ role: "user", content: "Say this is a test" }],
-      stream: true,
+      tools: [{ type: "file_search" }],
+      temperature: 0.1,
+    };
+
+    // Perhaps this should be pulled out to another function
+    const results: GPTResponse[] = [];
+
+    // Upload files and create threads concurrently
+    const fileThreads = filePaths.map(async (filePath: string) => {
+      // Pretty sure we need an assistant for each thread to keep it separated.
+      const fileID = await this.uploadFile(filePath);
+      const threadMessage: ThreadMessage = {
+        role: "assistant",
+        content: prompt + questions,
+        attachments: [{ file_id: fileID, tools: [{ type: "file_search" }] }],
+      };
+      //console.log(`Thread Message: ${threadMessage}`)
+      // Create the three threads for each paper
+      let threadResults: GPTData[] = [];
+      const loopPromises = Array.from({ length: 3 }, async (_) => {
+        const assistant = await this.createAssistant(assistantParams);
+        const thread = await this.createThread(threadMessage);
+        // Run the assistant on the thread and get the prompt results. Think non-stream results are better?
+        let run = await GPTController.client.beta.threads.runs.createAndPoll(
+          thread.id,
+          {
+            assistant_id: assistant.id,
+          },
+        );
+        var result = "";
+        if (run.status == "completed") {
+          const messages =
+            await GPTController.client.beta.threads.messages.list(
+              run.thread_id,
+            );
+          var n = 1;
+          for (const message of messages.data.reverse()) {
+            // Need to check if the message content is text before parsing it
+            if (message.content[0].type == "text") {
+              result = message.content[0].text.value;
+              var resvalues: GPTData = {    // Initialize GPT data object
+                paper_name: "",
+                year: 0,
+                author: [],
+                part_no: "",
+                type: [],
+                manufacturer: "",
+                testing_location: "Terrestrial",
+                testing_type: "TID",
+                data_type: 0
+              }
+              // Every second message has the data values
+              if(n % 2 == 0) {
+                // console.log(`${message.role} > ${result}`);
+                let preres = result.split("ø").map((s) => s.replace("\n", ""));
+                // console.log(preres)
+                resvalues = {
+                  paper_name: preres[0],
+                  year: parseInt(preres[1]),
+                  author: preres[2].split(","),
+                  part_no: preres[3],
+                  type: preres[4].split("¶"),
+                  manufacturer: preres[5],
+                  testing_location: <TestLocation>preres[6],
+                  testing_type: <Testing>preres[7],     // TODO: this gives a list ("TID, TID, DD") sometimes so the cast may fail
+                  data_type: 0    // TODO: add a prompt to get data_type
+                };
+                console.log(resvalues)
+                threadResults.push(resvalues);
+              }
+              n++;
+            }
+          }
+        } else {
+          console.log(run.status);
+        }
+      });
+
+      // Wait for all loop iterations to finish
+      await Promise.all(loopPromises);
+
+      const threadFinal: GPTResponse = {
+        pass_1: threadResults[0],
+        pass_2: threadResults[1],
+        pass_3: threadResults[2],
+      };
+
+      //console.log(threadFinal)
+      results.push(threadFinal);
+
+      // TODO: Need to add the stream and and return it, not working yet.
+      // Will be uncommented to implement
+
+      /*
+      const stream = await GPTController.client.beta.threads.runs.create(
+        thread.id, 
+        {assistant_id: assistant.id, stream: true}
+
+      )
+
+      let response = '';
+      for await (const chunk of stream) {
+        response += chunk.choices[0]?.delta?.content || "";
+      }
+      */
     });
-    for await (const chunk of stream) {
-      process.stdout.write(chunk.choices[0]?.delta?.content || "");
-    }
+
+    await Promise.all(fileThreads);
+    return results;
   }
 
   /*
@@ -43,7 +152,39 @@ class GPTController {
       file: fileStream,
       purpose: "assistants",
     });
-
+    console.log("uploadFile: ", response);
     return response.id; // Return the uploaded file ID
   }
+
+  /*
+   * Parameters:
+   *  - assistantDetails: an instance of AssistantBody containing the required info to create an assistant
+   * Function: Creates a new assistant
+   * Returns:
+   *  - OpenAI.Beta.Assistants.Assistant: The new assistant instance
+   */
+  private async createAssistant(
+    assistantDetails: AssistantBody,
+  ): Promise<OpenAI.Beta.Assistants.Assistant> {
+    const assistant = await GPTController.client.beta.assistants.create(
+      assistantDetails,
+    );
+    return assistant;
+  }
+
+  /*
+   * Parameters:
+   *  - threadMessage: an instance of ThreadMessage containing the required info to create a new message
+   * Function: Creates a new thread with an accompanied message
+   * Returns:
+   *  - OpenAI.Beta.Thread: The new thread
+   */
+  private async createThread(
+    threadMessage: ThreadMessage,
+  ): Promise<OpenAI.Beta.Thread> {
+    const thread = await GPTController.client.beta.threads.create({
+      messages: [threadMessage],
+    });
+    return thread;
+  }
 }
diff --git a/server/src/index.ts b/server/src/index.ts
@@ -4,17 +4,21 @@ import cors from "cors";
 import bodyParser from "body-parser";
 import dotenv from "dotenv";
 import { open, Database } from "sqlite";
+import dotenv from "dotenv"
 
 // Import routers
 
 import exampleRouter from "./routes/example-router";
 import cascadeRouter from "./routes/cascade-router";
 import { DatabaseController } from "./database-controller";
+
 import adminRouter from "./routes/admin-router";
+import { GPTController } from "./gpt-controller";
+import { GPTModel } from "./enums";
 
 const app = express();
 const PORT = process.env.PORT || 3000; // Use environment variable if available, otherwise default to 3000
-dotenv.config();
+dotenv.config();dotenv.config();
 /* In the future this will be used to ensure that only requests from certain domains are accepted
 const corsOptions = {
   origin: (origin: string | undefined, callback: (err: Error | null, allowed: boolean) => void) => {
@@ -34,19 +38,19 @@ const corsOptions = {
 app.use(cors(corsOptions));
 app.use(bodyParser.json());
 
-async function initializeSystem(): Promise<DatabaseController> {
+async function initializeSystem(): Promise<{dbController: DatabaseController, gptController: GPTController}> {
   const db = await open({
     filename: "./database.db",
     driver: sqlite3.Database,
   });
-  return new DatabaseController(db);
+  return {dbController: new DatabaseController(db), gptController: new GPTController(GPTModel.GPT3_5Turbo)};
 }
 
-initializeSystem().then((dbController: DatabaseController) => {
+initializeSystem().then(({dbController, gptController}) => {
   app.use("/", exampleRouter);
   //app.use("/getTable", tableRouter)
   app.use("/api/dataRequest", cascadeRouter(dbController));
-  app.use("/api/adminRequest", adminRouter(dbController));
+  app.use("/api/adminRequest", adminRouter(dbController, gptController));
 
   app.listen(PORT, () => {
     console.log(`Server is running on ${PORT}`);
diff --git a/server/src/prompts.data.ts b/server/src/prompts.data.ts
@@ -0,0 +1,51 @@
+export const questions = [
+  "What is the title of the paper",
+  "Which year was the paper published",
+  "What are all of the author's names, in the format (J. Doe) in a list like this ['J. Doe', 'R. Austin']",
+  "What is the Part No. or name if that is not available",
+  'What is the type of part (eg, switching regulator), if there are multiple part numbers listed, list them all and seperate them with a "¶"',
+  "Who is the manufacturer",
+  'What is the type of testing location: Respond to this question with "Terrestrial" for a terrestial testing location, or "Flight" for a flight testing location',
+  'What type of testing was done: Respond to this question with "TID" for Total Ionizing Dose testing, "SEE" for heavy ion, proton, laser, or neutron testing, or "OTHER" if you are not completely 100% sure',
+];
+
+export const prompt = `Please answer the following questions, as concisely as possible, and with a heavy emphasis on numbers instead of words.\n
+    Use standard text and do not provide citations for each of your answers. 
+    Answer each question, and separate the answers with a "ø" character as a delimiter.
+    If you are unable to answer the question accurately, provide the answer N/A.\n`;
+
+export const Other_targeted_questions = [
+  "What type was the radiation source",
+  "Were there any failures, if so, when?",
+];
+
+export const TID_targeted_questions = [
+  "What type was the radiation source",
+  "What was the total dose",
+  "Were there any failures, if so, when?",
+];
+
+export const SEE_targeted_questions = [
+  "What type was the radiation source",
+  "What the energy of the source",
+  "Were there any failures, if so, when?",
+];
+
+export const targeted_prompt = `Please answer the following questions, as concisely as possible, and with a heavy emphasis on numbers instead of words.
+    Use standard text and do not provide citations for each of your answers. 
+    Answer each question, and separate the answers with a "ø" character as a delimiter.
+    If you are unable to answer the question accurately, provide the answer N/A.\n`;
+
+export const sort_questions = `There are five types of papers: 
+    The first are \"Laboratory Capabilities/Facility Equipment/Simulator\", which detail the capacities of a location, or university for use in research.
+    The second are \"Testing Methods\", which detail specific methods of testing, without any devices being tested in the paper.
+    The third are \"Phenomenons/Theory Papers_Sorted\", which detail theories or phenomenons that occur on a wide variety of devices, without doing specific testing on a device.
+    The fourth are \"Compendiums\", which are collections of concise but detailed data on a large variety of devices. The devices must have their part numbers listed to be in this category.
+    The fifth are \"Single/Multiple Device Testing\", which are papers that test one or more devices with one or more types of radiation. The device must have a part number to be in this category.
+    In the same order, respond with \"LAB\", \"TST\", \"PHE\", \"CMP\", or \"SMD\", for the category that the paper best fits.`;
+
+export const sort_prompts = `Please answer the following question, as concisely as possible, with a single word answer as outlined in the question.
+    Classify this paper into one of the following categories: """ + questions + """
+    Use standard text and do not provide citations for each of your answer.
+    Answer the question with the keyword for one of the 5 papers.
+    If you are unable to answer the question accurately, provide the answer N/A.`;
diff --git a/server/src/types.ts b/server/src/types.ts
@@ -1,5 +1,8 @@
 // TODO: Add proper values to data types
 
+import OpenAI from "openai";
+import { GPTModel } from "./enums";
+
 export type GetQuery = {
   paper_name?: string;
   author?: string;
@@ -35,6 +38,40 @@ export type RadData = {
   data_type: number;
 };
 
+export type AssistantBody = {
+  name: string;
+  description?: string;
+  instructions: string;
+  model: GPTModel;
+  tools: OpenAI.Beta.Assistants.AssistantTool[];
+  temperature: number;
+  response_format?: OpenAI.Beta.Threads.AssistantResponseFormatOption;
+};
+
+export type GPTData = {
+  paper_name: string;
+  year: number;
+  author: string[];
+  part_no: string;
+  type: string[];
+  manufacturer: string;
+  testing_location: TestLocation;
+  testing_type: Testing;
+  data_type: number;
+}
+
+export type GPTResponse = {
+  pass_1: GPTData,
+  pass_2: GPTData,
+  pass_3: GPTData
+}
+
+export type ThreadMessage = {
+  role: "user" | "assistant";
+  content: string;
+  attachments: OpenAI.Beta.Threads.ThreadCreateParams.Message.Attachment[];
+};
+
 // Type of testing done
 export type TestLocation = "Terrestrial" | "Flight";
 
diff --git a/server/test/gpt-controller.test.ts b/server/test/gpt-controller.test.ts
diff --git a/server/test/testfiles/.gitkeep b/server/test/testfiles/.gitkeep