1
+ import { fromPath } from 'pdf2pic' ;
2
+ import { existsSync , mkdirSync } from 'fs' ;
3
+ import { join , dirname } from 'path' ;
4
+ import axios from 'axios' ;
5
+ import * as tmp from 'tmp' ;
6
+ import * as fs from 'fs' ;
7
+ import { Mistral } from "@mistralai/mistralai" ;
8
+ import { readdir } from 'fs/promises' ;
9
+ import dotenv from 'dotenv' ;
10
+
11
+ // Load environment variables
12
+ dotenv . config ( ) ;
13
+
14
+ // Type definitions
15
+ type ExamDetail = {
16
+ "course-name" : string ;
17
+ "slot" : string ;
18
+ "course-code" : string ;
19
+ "exam-type" : string ;
20
+ }
21
+
22
+ type AnalysisResult = {
23
+ imageName : string ;
24
+ examDetail : ExamDetail ;
25
+ rawAnalysis : string ;
26
+ }
27
+
28
+ type MistralResponse = {
29
+ choices : Array < {
30
+ message : {
31
+ content : string ;
32
+ } ;
33
+ } > ;
34
+ } ;
35
+
36
+ // Custom error type
37
+ class ProcessingError extends Error {
38
+ constructor ( message : string ) {
39
+ super ( message ) ;
40
+ this . name = 'ProcessingError' ;
41
+ }
42
+ }
43
+
44
+ // Function to ensure output directory exists
45
+ function ensureOutputDirectory ( ) : string {
46
+ const outputDir = join ( process . cwd ( ) , 'output_images' ) ;
47
+ if ( ! existsSync ( outputDir ) ) {
48
+ mkdirSync ( outputDir , { recursive : true } ) ;
49
+ }
50
+ return outputDir ;
51
+ }
52
+
53
+ // Function to download the PDF from the URL
54
+ async function downloadPDF ( url : string ) : Promise < string > {
55
+ try {
56
+ const tmpFile = tmp . fileSync ( { postfix : '.pdf' } ) ;
57
+ const response = await axios ( {
58
+ method : 'GET' ,
59
+ url,
60
+ responseType : 'arraybuffer' ,
61
+ } ) ;
62
+ fs . writeFileSync ( tmpFile . name , response . data ) ;
63
+ return tmpFile . name ;
64
+ } catch ( error : unknown ) {
65
+ const errorMessage = error instanceof Error ? error . message : 'Unknown error occurred' ;
66
+ console . error ( 'Error downloading PDF:' , errorMessage ) ;
67
+ throw new ProcessingError ( errorMessage ) ;
68
+ }
69
+ }
70
+
71
+ // Function to convert the downloaded PDF to images
72
+ async function convertPDFToImages ( pdfUrl : string ) : Promise < string > {
73
+ try {
74
+ const pdfPath = await downloadPDF ( pdfUrl ) ;
75
+ const outputDir = ensureOutputDirectory ( ) ;
76
+
77
+ const options = {
78
+ density : 300 ,
79
+ saveFilename : "page" ,
80
+ savePath : outputDir ,
81
+ format : "png" ,
82
+ width : 2480 ,
83
+ height : 3508
84
+ } ;
85
+
86
+ const convert = fromPath ( pdfPath , options ) ;
87
+ const pageCount = await convert . bulk ( - 1 , { responseType : "image" } ) ;
88
+ console . log ( `Successfully converted ${ pageCount . length } pages to images` ) ;
89
+ console . log ( `Images are saved in: ${ outputDir } ` ) ;
90
+
91
+ return outputDir ;
92
+ } catch ( error : unknown ) {
93
+ const errorMessage = error instanceof Error ? error . message : 'Unknown error occurred' ;
94
+ console . error ( 'Error converting PDF to images:' , errorMessage ) ;
95
+ throw new ProcessingError ( errorMessage ) ;
96
+ }
97
+ }
98
+
99
+ // Function to convert image to base64
100
+ function imageToBase64 ( filePath : string ) : string {
101
+ try {
102
+ const image = fs . readFileSync ( filePath ) ;
103
+ return Buffer . from ( image ) . toString ( 'base64' ) ;
104
+ } catch ( error : unknown ) {
105
+ const errorMessage = error instanceof Error ? error . message : 'Unknown error occurred' ;
106
+ throw new ProcessingError ( `Error converting image to base64: ${ errorMessage } ` ) ;
107
+ }
108
+ }
109
+
110
+ // Function to parse Mistral's response into ExamDetail format
111
+ function parseExamDetail ( analysis : string ) : ExamDetail {
112
+ try {
113
+ // Try to find JSON in the response
114
+ const jsonMatch = analysis . match ( / \{ [ \s \S ] * \} / ) ;
115
+ if ( jsonMatch ) {
116
+ const examDetail = JSON . parse ( jsonMatch [ 0 ] ) ;
117
+ return examDetail as ExamDetail ;
118
+ }
119
+
120
+ throw new Error ( "Could not parse exam details from response" ) ;
121
+ } catch ( error ) {
122
+ console . error ( "Error parsing exam details:" , error ) ;
123
+ return {
124
+ "course-name" : "Unknown" ,
125
+ "slot" : "Unknown" ,
126
+ "course-code" : "Unknown" ,
127
+ "exam-type" : "Unknown"
128
+ } ;
129
+ }
130
+ }
131
+
132
+ // Function to analyze images using Mistral AI
133
+ async function analyzeImages ( imageDirectory : string ) : Promise < AnalysisResult [ ] > {
134
+ try {
135
+ const apiKey = process . env . MISTRAL_API_KEY ;
136
+ if ( ! apiKey ) {
137
+ throw new ProcessingError ( "MISTRAL_API_KEY environment variable not set" ) ;
138
+ }
139
+ const client = new Mistral ( { apiKey } ) ;
140
+
141
+ const files = await readdir ( imageDirectory ) ;
142
+ const firstImageFile = files . find ( file => file . toLowerCase ( ) . endsWith ( '.png' ) ) ;
143
+
144
+ const results : AnalysisResult [ ] = [ ] ;
145
+
146
+ if ( ! firstImageFile ) {
147
+ throw new ProcessingError ( 'No .png file found in the directory' ) ;
148
+ }
149
+
150
+ const imagePath = join ( imageDirectory , firstImageFile ) ;
151
+ const imageBase64 = imageToBase64 ( imagePath ) ;
152
+ const dataUrl = `data:image/png;base64,${ imageBase64 } ` ;
153
+
154
+ const prompt = `Please analyze this exam paper image and extract the following details in JSON format:
155
+ - course-name: The full name of the course (3-4 words, no numbers or special characters)
156
+ - slot: One of A1|A2|B1|B2|C1|C2|D1|D2|E1|E2|F1|F2|G1|G2
157
+ - course-code: The course code (format: department letters + numbers)
158
+ - exam-type: One of "Final Assessment Test|Continuous Assessment Test - 1|Continuous Assessment Test - 2"
159
+
160
+ Provide the response in this exact format:
161
+ {
162
+ "course-name": "...",
163
+ "slot": "...",
164
+ "course-code": "...",
165
+ "exam-type": "..."
166
+ }` ;
167
+
168
+ const chatResponse = await client . chat . complete ( {
169
+ model : "pixtral-12b" ,
170
+ messages : [
171
+ {
172
+ role : "user" ,
173
+ content : [
174
+ { type : "text" , text : prompt } ,
175
+ { type : "image_url" , imageUrl : dataUrl }
176
+ ]
177
+ }
178
+ ]
179
+ } ) as MistralResponse ;
180
+
181
+ if ( ! chatResponse ?. choices ?. [ 0 ] ?. message ?. content ) {
182
+ throw new ProcessingError ( 'Invalid response from Mistral API' ) ;
183
+ }
184
+
185
+ const rawAnalysis = chatResponse . choices [ 0 ] . message . content ;
186
+ const examDetail = parseExamDetail ( rawAnalysis ) ;
187
+
188
+ results . push ( {
189
+ imageName : firstImageFile ,
190
+ examDetail,
191
+ rawAnalysis
192
+ } ) ;
193
+
194
+ return results ;
195
+ } catch ( error : unknown ) {
196
+ const errorMessage = error instanceof Error ? error . message : 'Unknown error occurred' ;
197
+ console . error ( 'Error analyzing images:' , errorMessage ) ;
198
+
199
+ return [ {
200
+ imageName : 'error.png' ,
201
+ examDetail : {
202
+ "course-name" : "Error" ,
203
+ "slot" : "Error" ,
204
+ "course-code" : "Error" ,
205
+ "exam-type" : "Error"
206
+ } ,
207
+ rawAnalysis : `Error analyzing image: ${ errorMessage } `
208
+ } ] ;
209
+ }
210
+ }
211
+
212
+ // Main function to process everything
213
+ async function processPDFAndAnalyze ( pdfUrl : string ) : Promise < void > {
214
+ try {
215
+ // Convert PDF to images and get the output directory
216
+ const outputDir = await convertPDFToImages ( pdfUrl ) ;
217
+
218
+ // Analyze all the generated images
219
+ const analysisResults = await analyzeImages ( outputDir ) ;
220
+
221
+ // Save results to a JSON file
222
+ const resultsPath = join ( outputDir , 'analysis_results.json' ) ;
223
+ fs . writeFileSync (
224
+ resultsPath ,
225
+ JSON . stringify ( analysisResults , null , 2 )
226
+ ) ;
227
+
228
+ console . log ( 'Analysis completed. Results saved to:' , resultsPath ) ;
229
+
230
+ // Log results to console
231
+ analysisResults . forEach ( result => {
232
+ console . log ( `\nAnalysis for ${ result . imageName } :` ) ;
233
+ console . log ( 'Exam Details:' , result . examDetail ) ;
234
+ console . log ( 'Raw Analysis:' , result . rawAnalysis ) ;
235
+ } ) ;
236
+
237
+ } catch ( error : unknown ) {
238
+ const errorMessage = error instanceof Error ? error . message : 'Unknown error occurred' ;
239
+ console . error ( 'Error in processing:' , errorMessage ) ;
240
+ throw new ProcessingError ( errorMessage ) ;
241
+ }
242
+ }
243
+
244
+ // Example usage - Replace with your PDF URL
245
+ const pdfUrl = 'https://res.cloudinary.com/dtorpaj1c/image/upload/v1731668830/papers/mykcs2yxaman61kx0jvj.pdf' ;
246
+
247
+ // Run the complete process
248
+ processPDFAndAnalyze ( pdfUrl )
249
+ . then ( ( ) => console . log ( 'Complete processing finished' ) )
250
+ . catch ( error => console . error ( 'Processing failed:' , error ) ) ;
0 commit comments