|
| 1 | +import '@ungap/with-resolvers'; |
| 2 | + |
| 3 | + |
| 4 | +// import { Mistral } from "@mistralai/mistralai"; |
| 5 | +import {GoogleGenerativeAI} from "@google/generative-ai"; |
| 6 | +import {type ExamDetail } from '@/interface'; |
| 7 | + |
| 8 | +// Type definitions |
| 9 | + |
| 10 | + |
| 11 | +type AnalysisResult = { |
| 12 | + examDetail: ExamDetail; |
| 13 | + rawAnalysis: string; |
| 14 | +}; |
| 15 | + |
| 16 | +// type MistralResponse = { |
| 17 | +// choices: Array<{ |
| 18 | +// message: { |
| 19 | +// content: string; |
| 20 | +// }; |
| 21 | +// }>; |
| 22 | +// }; |
| 23 | + |
| 24 | +// Custom error type |
| 25 | +class ProcessingError extends Error { |
| 26 | + constructor(message: string) { |
| 27 | + super(message); |
| 28 | + this.name = "ProcessingError"; |
| 29 | + } |
| 30 | +} |
| 31 | + |
| 32 | +// Function to ensure output directory exists |
| 33 | +export default async function processAndAnalyze({ |
| 34 | + imageURL, |
| 35 | +}: { |
| 36 | + imageURL:string; |
| 37 | +}) { |
| 38 | + |
| 39 | + if (imageURL) { |
| 40 | + const analysisResult = await analyzeImage(imageURL); |
| 41 | + return analysisResult[0]?.examDetail; |
| 42 | + } else { |
| 43 | + throw "Error Creating the Image"; |
| 44 | + } |
| 45 | +} |
| 46 | +// export async function pdfToImage(file: File) { |
| 47 | +// // GlobalWorkerOptions.workerSrc = process.cwd() + '/public/pdf.worker.js' |
| 48 | +// const bytes = await file.arrayBuffer(); |
| 49 | + |
| 50 | +// const buffer = Buffer.from(bytes); |
| 51 | +// const singlePage = await PdfToImg(buffer, { |
| 52 | +// returnType: "base64", // accept "base64" and "buffer" |
| 53 | +// imgType: "png", // accept "png" and "jpg" |
| 54 | +// pages: "firstPage", |
| 55 | +// }); |
| 56 | +// return singlePage |
| 57 | +// } |
| 58 | +// Function to convert PDF file's first page to image |
| 59 | + |
| 60 | + |
| 61 | +// Function to convert the downloaded PDF to images |
| 62 | +// async function convertPDFToImages(binaryData: string): Promise<string> { |
| 63 | + |
| 64 | +// const pdf = await PDFDocument.load(binaryData) |
| 65 | +// const bytes = await (await pdf.save()) |
| 66 | +// const buffer = Buffer.from(bytes); |
| 67 | +// const dataUrl = `data:${'application/pdf'};base64,${buffer.toString("base64")}`; |
| 68 | +// return dataUrl |
| 69 | + |
| 70 | +// } |
| 71 | + |
| 72 | +// Function to parse Mistral's response into ExamDetail format |
| 73 | +function parseExamDetail(analysis: string): ExamDetail { |
| 74 | + try { |
| 75 | + // Try to find JSON in the response |
| 76 | + const jsonMatch = analysis.match(/\{[\s\S]*\}/); |
| 77 | + if (jsonMatch) { |
| 78 | + const examDetail: ExamDetail = JSON.parse(jsonMatch[0]) as ExamDetail; |
| 79 | + if (examDetail.semester) { |
| 80 | + const validSemesters = ["Fall Semester", "Winter Semester", "Summer Semester", "Weekend Semester"]; |
| 81 | + if (!validSemesters.includes(examDetail.semester)) { |
| 82 | + examDetail.semester = "Fall Semester"; // Default to Fall Semester if invalid |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + if (examDetail.year) { |
| 87 | + const yearPattern = /^\d{4}$/; |
| 88 | + if (!yearPattern.test(examDetail.year)) { |
| 89 | + examDetail.year = new Date().getFullYear().toString(); // Default to current year if invalid |
| 90 | + } |
| 91 | + } |
| 92 | + return examDetail |
| 93 | + } |
| 94 | + |
| 95 | + throw new Error("Could not parse exam details from response"); |
| 96 | + } catch (error) { |
| 97 | + console.error("Error parsing exam details:", error); |
| 98 | + return { |
| 99 | + "course-name": "Unknown", |
| 100 | + slot: "Unknown", |
| 101 | + "course-code": "Unknown", |
| 102 | + "exam-type": "Unknown", |
| 103 | + semester: "Fall Semester", |
| 104 | + year: new Date().getFullYear().toString() |
| 105 | + }; |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +// Function to analyze images using Mistral AI |
| 110 | +async function analyzeImage(dataUrl: string): Promise<AnalysisResult[]> { |
| 111 | + try { |
| 112 | + const apiKey = process.env.GEMINI_API_KEY; |
| 113 | + if (!apiKey) { |
| 114 | + throw new ProcessingError("GEMINI_API_KEY environment variable not set"); |
| 115 | + } |
| 116 | + const genAI = new GoogleGenerativeAI(apiKey); |
| 117 | + const model = genAI.getGenerativeModel({model: "gemini-1.5-flash"}) |
| 118 | + const results: AnalysisResult[] = []; |
| 119 | + |
| 120 | + // const dataUrl = `data:image/png;base64,${imageBase64}`; |
| 121 | + |
| 122 | + const prompt = `This is an image of a question paper. I want you to extract the Exam name, there can be three: final assessment test, continuous assessment test 1, continuous assessment test 2.Now Final assessment should be labelled as FAT, Continuous assessment 1 should be labelled as CAT1 and Continuous assessment 2 should be labelled as CAT2. Also I want you to find me the semester it is from, there can be four: Fall Semester, Winter Semester, Summer Semester, Weekend Semester. Fall semester lasts form july to end of the year and winter from january to april, rest is summer semester.Do not put weekend semester if you dont see it in the image. Also find me the year of the exam and the slot of the exam, they look something like this : A1, A1+TA1, B2+BT2, C1+TC1+TCC1 etc.Instead of the entire slot though i just require the initial, alphaber and number part before the plus sign. And I also require the course title and the course code from the paper. Course code looks something like : BCSE202P, BCSE307L etc. if you unable to find return NOT FOUND also format your output into a .json format. most importantly if you are unsure of anything at all just return NOT FOUND`; |
| 123 | + |
| 124 | + const image = { |
| 125 | + inlineData: { |
| 126 | + data: dataUrl, |
| 127 | + mimeType: "image/png", |
| 128 | + }, |
| 129 | + }; |
| 130 | + |
| 131 | + const result = await model.generateContent([prompt, image]); |
| 132 | + |
| 133 | + const chatResponse = result.response.text(); |
| 134 | + const rawAnalysis = await chatResponse; |
| 135 | + |
| 136 | + const examDetail: ExamDetail = parseExamDetail(rawAnalysis); |
| 137 | + |
| 138 | + results.push({ |
| 139 | + examDetail, |
| 140 | + rawAnalysis, |
| 141 | + }); |
| 142 | + |
| 143 | + return results; |
| 144 | + } catch (error: unknown) { |
| 145 | + const errorMessage = |
| 146 | + error instanceof Error ? error.message : "Unknown error occurred"; |
| 147 | + console.error("Error analyzing images:", errorMessage); |
| 148 | + |
| 149 | + return [ |
| 150 | + { |
| 151 | + examDetail: { |
| 152 | + "course-name": "Error", |
| 153 | + slot: "Error", |
| 154 | + "course-code": "Error", |
| 155 | + "exam-type": "Error", |
| 156 | + semester: "Fall Semester", |
| 157 | + year: new Date().getFullYear().toString() |
| 158 | + }, |
| 159 | + rawAnalysis: `Error analyzing image: ${errorMessage}`, |
| 160 | + }, |
| 161 | + ]; |
| 162 | + } |
| 163 | +} |
| 164 | +// Main function to process everything |
| 165 | +// async function processPDFAndAnalyze(pdfUrl: string): Promise<void> { |
| 166 | +// try { |
| 167 | +// // Convert PDF to images and get the output directory |
| 168 | +// const outputDir = await convertPDFToImages(pdfUrl); |
| 169 | + |
| 170 | +// // Analyze all the generated images |
| 171 | +// const analysisResults = await analyzeImages(outputDir); |
| 172 | + |
| 173 | +// // Save results to a JSON file |
| 174 | +// const resultsPath = join(outputDir, 'analysis_results.json'); |
| 175 | +// fs.writeFileSync( |
| 176 | +// resultsPath, |
| 177 | +// JSON.stringify(analysisResults, null, 2) |
| 178 | +// ); |
| 179 | + |
| 180 | +// console.log('Analysis completed. Results saved to:', resultsPath); |
| 181 | + |
| 182 | +// // Log results to console |
| 183 | +// analysisResults.forEach(result => { |
| 184 | +// console.log(`\nAnalysis for ${result.imageName}:`); |
| 185 | +// console.log('Exam Details:', result.examDetail); |
| 186 | +// console.log('Raw Analysis:', result.rawAnalysis); |
| 187 | +// }); |
| 188 | + |
| 189 | +// } catch (error: unknown) { |
| 190 | +// const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; |
| 191 | +// console.error('Error in processing:', errorMessage); |
| 192 | +// throw new ProcessingError(errorMessage); |
| 193 | +// } |
| 194 | +// } |
| 195 | + |
| 196 | +// Example usage - Replace with your PDF URL |
| 197 | +// const pdfUrl = |
| 198 | +// "https://res.cloudinary.com/dtorpaj1c/image/upload/v1731668830/papers/mykcs2yxaman61kx0jvj.pdf"; |
| 199 | + |
| 200 | +// Run the complete process |
| 201 | +// processPDFAndAnalyze(pdfUrl) |
| 202 | +// .then(() => console.log('Complete processing finished')) |
| 203 | +// .catch(error => console.error('Processing failed:', error)); |
| 204 | + |
| 205 | +// Function to download the PDF from the URL |
| 206 | + |
| 207 | +// async function downloadPDF(url: string): Promise<string> { |
| 208 | +// try { |
| 209 | +// const tmpFile = tmp.fileSync({ postfix: '.pdf' }); |
| 210 | +// const response = await axios({ |
| 211 | +// method: 'GET', |
| 212 | +// url, |
| 213 | +// responseType: 'arraybuffer', |
| 214 | +// }); |
| 215 | +// fs.writeFileSync(tmpFile.name, response.data); |
| 216 | +// return tmpFile.name; |
| 217 | +// } catch (error: unknown) { |
| 218 | +// const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred'; |
| 219 | +// console.error('Error downloading PDF:', errorMessage); |
| 220 | +// throw new ProcessingError(errorMessage); |
| 221 | +// } |
| 222 | +// } |
| 223 | + |
| 224 | +// // Function to convert image to base64 |
| 225 | +// function imageToBase64(image: Buffer): string { |
| 226 | +// try { |
| 227 | +// return Buffer.from(image).toString("base64"); |
| 228 | +// } catch (error: unknown) { |
| 229 | +// const errorMessage = |
| 230 | +// error instanceof Error ? error.message : "Unknown error occurred"; |
| 231 | +// throw new ProcessingError( |
| 232 | +// `Error converting image to base64: ${errorMessage}`, |
| 233 | +// ); |
| 234 | +// } |
| 235 | +// } |
0 commit comments