Skip to content

Commit 7ac427e

Browse files
Merge pull request #81 from guth01/staging
Mistral__test
2 parents ddb825d + 53f10ec commit 7ac427e

File tree

2 files changed

+266
-0
lines changed

2 files changed

+266
-0
lines changed

src/app/api/getTags/route.ts

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
import { fromPath } from 'pdf2pic';
2+
import { existsSync, mkdirSync } from 'fs';
3+
import { join, dirname } from 'path';
4+
import axios from 'axios';
5+
import * as tmp from 'tmp';
6+
import * as fs from 'fs';
7+
import { Mistral } from "@mistralai/mistralai";
8+
import { readdir } from 'fs/promises';
9+
import dotenv from 'dotenv';
10+
11+
// Load environment variables
12+
dotenv.config();
13+
14+
// Type definitions
15+
type ExamDetail = {
16+
"course-name": string;
17+
"slot": string;
18+
"course-code": string;
19+
"exam-type": string;
20+
}
21+
22+
type AnalysisResult = {
23+
imageName: string;
24+
examDetail: ExamDetail;
25+
rawAnalysis: string;
26+
}
27+
28+
type MistralResponse = {
29+
choices: Array<{
30+
message: {
31+
content: string;
32+
};
33+
}>;
34+
};
35+
36+
// Custom error type
37+
class ProcessingError extends Error {
38+
constructor(message: string) {
39+
super(message);
40+
this.name = 'ProcessingError';
41+
}
42+
}
43+
44+
// Function to ensure output directory exists
45+
function ensureOutputDirectory(): string {
46+
const outputDir = join(process.cwd(), 'output_images');
47+
if (!existsSync(outputDir)) {
48+
mkdirSync(outputDir, { recursive: true });
49+
}
50+
return outputDir;
51+
}
52+
53+
// Function to download the PDF from the URL
54+
async function downloadPDF(url: string): Promise<string> {
55+
try {
56+
const tmpFile = tmp.fileSync({ postfix: '.pdf' });
57+
const response = await axios({
58+
method: 'GET',
59+
url,
60+
responseType: 'arraybuffer',
61+
});
62+
fs.writeFileSync(tmpFile.name, response.data);
63+
return tmpFile.name;
64+
} catch (error: unknown) {
65+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
66+
console.error('Error downloading PDF:', errorMessage);
67+
throw new ProcessingError(errorMessage);
68+
}
69+
}
70+
71+
// Function to convert the downloaded PDF to images
72+
async function convertPDFToImages(pdfUrl: string): Promise<string> {
73+
try {
74+
const pdfPath = await downloadPDF(pdfUrl);
75+
const outputDir = ensureOutputDirectory();
76+
77+
const options = {
78+
density: 300,
79+
saveFilename: "page",
80+
savePath: outputDir,
81+
format: "png",
82+
width: 2480,
83+
height: 3508
84+
};
85+
86+
const convert = fromPath(pdfPath, options);
87+
const pageCount = await convert.bulk(-1, { responseType: "image" });
88+
console.log(`Successfully converted ${pageCount.length} pages to images`);
89+
console.log(`Images are saved in: ${outputDir}`);
90+
91+
return outputDir;
92+
} catch (error: unknown) {
93+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
94+
console.error('Error converting PDF to images:', errorMessage);
95+
throw new ProcessingError(errorMessage);
96+
}
97+
}
98+
99+
// Function to convert image to base64
100+
function imageToBase64(filePath: string): string {
101+
try {
102+
const image = fs.readFileSync(filePath);
103+
return Buffer.from(image).toString('base64');
104+
} catch (error: unknown) {
105+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
106+
throw new ProcessingError(`Error converting image to base64: ${errorMessage}`);
107+
}
108+
}
109+
110+
// Function to parse Mistral's response into ExamDetail format
111+
function parseExamDetail(analysis: string): ExamDetail {
112+
try {
113+
// Try to find JSON in the response
114+
const jsonMatch = analysis.match(/\{[\s\S]*\}/);
115+
if (jsonMatch) {
116+
const examDetail = JSON.parse(jsonMatch[0]);
117+
return examDetail as ExamDetail;
118+
}
119+
120+
throw new Error("Could not parse exam details from response");
121+
} catch (error) {
122+
console.error("Error parsing exam details:", error);
123+
return {
124+
"course-name": "Unknown",
125+
"slot": "Unknown",
126+
"course-code": "Unknown",
127+
"exam-type": "Unknown"
128+
};
129+
}
130+
}
131+
132+
// Function to analyze images using Mistral AI
133+
async function analyzeImages(imageDirectory: string): Promise<AnalysisResult[]> {
134+
try {
135+
const apiKey = process.env.MISTRAL_API_KEY;
136+
if (!apiKey) {
137+
throw new ProcessingError("MISTRAL_API_KEY environment variable not set");
138+
}
139+
const client = new Mistral({ apiKey });
140+
141+
const files = await readdir(imageDirectory);
142+
const firstImageFile = files.find(file => file.toLowerCase().endsWith('.png'));
143+
144+
const results: AnalysisResult[] = [];
145+
146+
if (!firstImageFile) {
147+
throw new ProcessingError('No .png file found in the directory');
148+
}
149+
150+
const imagePath = join(imageDirectory, firstImageFile);
151+
const imageBase64 = imageToBase64(imagePath);
152+
const dataUrl = `data:image/png;base64,${imageBase64}`;
153+
154+
const prompt = `Please analyze this exam paper image and extract the following details in JSON format:
155+
- course-name: The full name of the course (3-4 words, no numbers or special characters)
156+
- slot: One of A1|A2|B1|B2|C1|C2|D1|D2|E1|E2|F1|F2|G1|G2
157+
- course-code: The course code (format: department letters + numbers)
158+
- exam-type: One of "Final Assessment Test|Continuous Assessment Test - 1|Continuous Assessment Test - 2"
159+
160+
Provide the response in this exact format:
161+
{
162+
"course-name": "...",
163+
"slot": "...",
164+
"course-code": "...",
165+
"exam-type": "..."
166+
}`;
167+
168+
const chatResponse = await client.chat.complete({
169+
model: "pixtral-12b",
170+
messages: [
171+
{
172+
role: "user",
173+
content: [
174+
{ type: "text", text: prompt },
175+
{ type: "image_url", imageUrl: dataUrl }
176+
]
177+
}
178+
]
179+
}) as MistralResponse;
180+
181+
if (!chatResponse?.choices?.[0]?.message?.content) {
182+
throw new ProcessingError('Invalid response from Mistral API');
183+
}
184+
185+
const rawAnalysis = chatResponse.choices[0].message.content;
186+
const examDetail = parseExamDetail(rawAnalysis);
187+
188+
results.push({
189+
imageName: firstImageFile,
190+
examDetail,
191+
rawAnalysis
192+
});
193+
194+
return results;
195+
} catch (error: unknown) {
196+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
197+
console.error('Error analyzing images:', errorMessage);
198+
199+
return [{
200+
imageName: 'error.png',
201+
examDetail: {
202+
"course-name": "Error",
203+
"slot": "Error",
204+
"course-code": "Error",
205+
"exam-type": "Error"
206+
},
207+
rawAnalysis: `Error analyzing image: ${errorMessage}`
208+
}];
209+
}
210+
}
211+
212+
// Main function to process everything
213+
async function processPDFAndAnalyze(pdfUrl: string): Promise<void> {
214+
try {
215+
// Convert PDF to images and get the output directory
216+
const outputDir = await convertPDFToImages(pdfUrl);
217+
218+
// Analyze all the generated images
219+
const analysisResults = await analyzeImages(outputDir);
220+
221+
// Save results to a JSON file
222+
const resultsPath = join(outputDir, 'analysis_results.json');
223+
fs.writeFileSync(
224+
resultsPath,
225+
JSON.stringify(analysisResults, null, 2)
226+
);
227+
228+
console.log('Analysis completed. Results saved to:', resultsPath);
229+
230+
// Log results to console
231+
analysisResults.forEach(result => {
232+
console.log(`\nAnalysis for ${result.imageName}:`);
233+
console.log('Exam Details:', result.examDetail);
234+
console.log('Raw Analysis:', result.rawAnalysis);
235+
});
236+
237+
} catch (error: unknown) {
238+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
239+
console.error('Error in processing:', errorMessage);
240+
throw new ProcessingError(errorMessage);
241+
}
242+
}
243+
244+
// Example usage - Replace with your PDF URL
245+
const pdfUrl = 'https://res.cloudinary.com/dtorpaj1c/image/upload/v1731668830/papers/mykcs2yxaman61kx0jvj.pdf';
246+
247+
// Run the complete process
248+
processPDFAndAnalyze(pdfUrl)
249+
.then(() => console.log('Complete processing finished'))
250+
.catch(error => console.error('Processing failed:', error));

src/app/api/getTags/types.d.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
declare module 'pdf2pic' {
2+
interface Options {
3+
density?: number;
4+
saveFilename?: string;
5+
savePath?: string;
6+
format?: string;
7+
width?: number;
8+
height?: number;
9+
}
10+
11+
interface ConvertResponse {
12+
bulk: (pageNumber: number, options?: { responseType: string }) => Promise<any[]>;
13+
}
14+
15+
export function fromPath(path: string, options?: Options): ConvertResponse;
16+
}

0 commit comments

Comments
 (0)