Skip to content

Commit 33600d0

Browse files
Marfuentofikwest
andauthored
feat(api): add AI-powered question extraction and update dependencies (#1858)
Co-authored-by: Tofik Hasanov <[email protected]>
1 parent 1ce0560 commit 33600d0

File tree

7 files changed

+942
-85
lines changed

7 files changed

+942
-85
lines changed

apps/api/.env.example

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,6 @@ UPSTASH_VECTOR_REST_TOKEN=
2323
# Trigger
2424
TRIGGER_SECRET_KEY=
2525

26-
OPENAI_API_KEY=
26+
OPENAI_API_KEY=
27+
ANTHROPIC_API_KEY=
28+
GROQ_API_KEY=

apps/api/package.json

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,30 @@
44
"version": "0.0.1",
55
"author": "",
66
"dependencies": {
7+
"@ai-sdk/groq": "^2.0.32",
78
"@ai-sdk/openai": "^2.0.65",
8-
"@prisma/instrumentation": "^6.13.0",
9-
"@trigger.dev/build": "4.0.6",
10-
"@trigger.dev/sdk": "4.0.6",
11-
"@upstash/vector": "^1.2.2",
129
"@aws-sdk/client-s3": "^3.859.0",
13-
"ai": "^5.0.60",
1410
"@aws-sdk/s3-request-presigner": "^3.859.0",
1511
"@nestjs/common": "^11.0.1",
1612
"@nestjs/config": "^4.0.2",
1713
"@nestjs/core": "^11.0.1",
1814
"@nestjs/platform-express": "^11.1.5",
1915
"@nestjs/swagger": "^11.2.0",
2016
"@prisma/client": "^6.13.0",
17+
"@prisma/instrumentation": "^6.13.0",
2118
"@react-email/components": "^0.0.41",
19+
"@trigger.dev/build": "4.0.6",
20+
"@trigger.dev/sdk": "4.0.6",
2221
"@trycompai/db": "^1.3.17",
22+
"@upstash/vector": "^1.2.2",
23+
"ai": "^5.0.60",
2324
"archiver": "^7.0.1",
2425
"axios": "^1.12.2",
2526
"better-auth": "^1.3.27",
2627
"class-transformer": "^0.5.1",
2728
"class-validator": "^0.14.2",
2829
"dotenv": "^17.2.3",
30+
"exceljs": "^4.4.0",
2931
"jose": "^6.0.12",
3032
"jspdf": "^3.0.3",
3133
"mammoth": "^1.8.0",
@@ -47,13 +49,15 @@
4749
"@nestjs/cli": "^11.0.0",
4850
"@nestjs/schematics": "^11.0.0",
4951
"@nestjs/testing": "^11.0.1",
52+
"@types/adm-zip": "^0.5.7",
5053
"@types/archiver": "^6.0.3",
5154
"@types/express": "^5.0.0",
5255
"@types/jest": "^30.0.0",
5356
"@types/multer": "^1.4.12",
5457
"@types/node": "^24.0.3",
5558
"@types/supertest": "^6.0.2",
5659
"@types/swagger-ui-express": "^4.1.8",
60+
"adm-zip": "^0.5.16",
5761
"eslint": "^9.18.0",
5862
"eslint-config-prettier": "^10.0.1",
5963
"eslint-plugin-prettier": "^5.2.2",

apps/api/src/questionnaire/questionnaire.service.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import { db, Prisma } from '@db';
1616
import { syncManualAnswerToVector, syncOrganizationEmbeddings } from '@/vector-store/lib';
1717

1818
// Import shared utilities
19-
import { extractContentFromFile, type ContentExtractionLogger } from './utils/content-extractor';
19+
import { extractContentFromFile, extractQuestionsWithAI, type ContentExtractionLogger } from './utils/content-extractor';
2020
import { parseQuestionsAndAnswers, type QuestionAnswer as ParsedQA } from './utils/question-parser';
2121
import { generateExportFile, type ExportFormat } from './utils/export-generator';
2222
import {
@@ -149,12 +149,12 @@ export class QuestionnaireService {
149149
source: dto.source || 'internal',
150150
});
151151

152-
const content = await extractContentFromFile(
152+
// Use AI-powered extraction (faster, handles all file formats)
153+
const questionsAndAnswers = await extractQuestionsWithAI(
153154
dto.fileData,
154155
dto.fileType,
155156
this.contentLogger,
156157
);
157-
const questionsAndAnswers = await parseQuestionsAndAnswers(content, this.contentLogger);
158158

159159
const questionnaireId = await persistQuestionnaireResult(
160160
{

apps/api/src/questionnaire/utils/constants.ts

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,38 @@ CRITICAL RULES:
3030
8. Always write in first person plural (we, our, us) as if speaking on behalf of the organization.
3131
9. Keep answers to 1-3 sentences maximum unless the question explicitly requires more detail.`;
3232

33-
export const QUESTION_PARSING_SYSTEM_PROMPT = `You parse vendor questionnaires. Return only genuine question text paired with its answer.
34-
- Ignore table headers, column labels, metadata rows, or placeholder words such as "Question", "Company Name", "Department", "Assessment Date", "Name of Assessor".
35-
- A valid question is a meaningful sentence (usually ends with '?' or starts with interrogatives like What/Why/How/When/Where/Is/Are/Do/Does/Can/Will/Should).
36-
- Do not fabricate answers; if no answer is provided, set answer to null.
37-
- Keep the original question wording but trim whitespace.`;
33+
export const QUESTION_PARSING_SYSTEM_PROMPT = `You parse vendor questionnaires from Excel spreadsheets. Extract all question-answer pairs.
34+
35+
Input format:
36+
- Each row has columns like: [Question] ID | [Question Text] actual question | [Response] answer | [Comment] notes
37+
- Or: [Question] actual question text | [Response] answer
38+
- Lines starting with [COLUMNS:] show the column headers - use these to understand the structure
39+
- The actual question TEXT is usually the longest cell, contains "?" or starts with What/How/Do/Is/Are/etc.
40+
41+
CRITICAL: The "Question" column might contain just an ID (like "SQ14.3") - look for the column with the ACTUAL question text!
42+
43+
Rules:
44+
1. Find the column containing actual question sentences (not just IDs/numbers)
45+
2. The question text is usually a full sentence ending with "?" or starting with interrogative words
46+
3. Extract the FULL question text, not the question ID
47+
4. Match each question to its Response/Answer from the same row
48+
5. If Response is empty, set answer to null
49+
6. Skip section headers (e.g., "Information Security Program", "General Information")
50+
7. Skip metadata rows (Company Name, Date, etc.)`;
3851

3952
// Vision extraction prompt for PDFs and images
40-
export const VISION_EXTRACTION_PROMPT = `Extract all text and identify question-answer pairs. Look for columns/sections labeled "Question", "Q", "Answer", "A". Match questions (ending with "?" or starting with What/How/Why/When/Is/Can/Do) to nearby answers. Preserve order. Return only Question → Answer pairs.`;
53+
export const VISION_EXTRACTION_PROMPT = `Extract all text and identify question-answer pairs from this document.
54+
55+
Look for:
56+
- Tables with columns labeled "Question", "Q", "Response", "Answer", "A", "Comment"
57+
- Questions ending with "?" or starting with What/How/Why/When/Where/Is/Are/Do/Does/Can/Will/Should
58+
- Numbered questions like "06. Do you have...", "1) What is...", "Q1: How do..."
59+
- Section headers (e.g., "Information Security Program", "General Information") that group questions
60+
61+
For each question found:
62+
- Extract the full question text (may omit number prefix)
63+
- Match it to any nearby response/answer in the same row or adjacent cell
64+
- If no answer is provided, note it as empty
65+
66+
Preserve the order of questions as they appear. Return Question → Answer pairs in a structured format.`;
4167

0 commit comments

Comments
 (0)