Skip to content

Commit 127d8e1

Browse files
committed
Enhanced question validation system
- Add semantic answer validation to check if correct answer is supported by passage - Add answer consistency validation to ensure correct answer matches relevant text - Add explanation consistency validation to verify explanations match answers - Add question-answer coherence validation to ensure questions are answerable - Add comprehensive debugging and logging for validation failures - Improve retry logic with better error reporting - Extend quality metrics to track all validation aspects - Fix linting error for unnecessary condition check This addresses answer validation issues where correct answers were marked as incorrect due to poor AI generation or validation logic.
1 parent 048462f commit 127d8e1

File tree

2 files changed

+190
-1
lines changed

2 files changed

+190
-1
lines changed

app/lib/ai/exercise-generator.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@ import { generateExercisePrompt } from 'app/lib/ai/prompts/exercise-prompt';
22
import { ExerciseContent, ExerciseContentSchema, type QuizData } from 'app/domain/schemas';
33
import { type ExerciseGenerationParams } from 'app/domain/ai';
44
import { callGoogleAI, AIResponseProcessingError } from 'app/lib/ai/google-ai-api';
5-
import { validateQuestionQuality, logQualityMetrics } from 'app/lib/ai/question-validator';
5+
import {
6+
validateQuestionQuality,
7+
logQualityMetrics,
8+
debugValidationFailure,
9+
} from 'app/lib/ai/question-validator';
610

711
export { AIResponseProcessingError };
812

@@ -90,12 +94,16 @@ export const generateAndValidateExercise = async (
9094
console.warn(
9195
`[AI:generateAndValidateExercise] Quality validation failed on final attempt ${attempt + 1}: ${qualityValidation.reason}`
9296
);
97+
// Debug the validation failure
98+
debugValidationFailure(exerciseContent, qualityValidation.reason);
9399
}
94100
return exerciseContent;
95101
} else {
96102
console.warn(
97103
`[AI:generateAndValidateExercise] Quality validation failed on attempt ${attempt + 1}: ${qualityValidation.reason}. Retrying...`
98104
);
105+
// Debug the validation failure for retry attempts
106+
debugValidationFailure(exerciseContent, qualityValidation.reason);
99107
lastError = new AIResponseProcessingError(
100108
`Quality validation failed: ${qualityValidation.reason}`,
101109
qualityValidation.metrics

app/lib/ai/question-validator.ts

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ export interface QualityMetrics {
99
hasCorrectAnswer: boolean;
1010
allExplanationsPresent: boolean;
1111
relevantTextInParagraph: boolean;
12+
// Enhanced validation metrics
13+
answerConsistency: boolean;
14+
explanationConsistency: boolean;
15+
questionAnswerCoherence: boolean;
16+
semanticAnswerValidation: boolean;
1217
// Add more metrics as needed
1318
}
1419

@@ -22,10 +27,107 @@ const MIN_QUESTION_LENGTH = 10;
2227
const MIN_EXPLANATION_LENGTH = 20;
2328
const MIN_OPTION_LENGTH = 3;
2429

30+
// Helper function to normalize text for comparison
31+
const normalizeText = (text: string): string => {
32+
return text
33+
.toLowerCase()
34+
.replace(/[^\w\s]/g, '') // Remove punctuation
35+
.replace(/\s+/g, ' ') // Normalize whitespace
36+
.trim();
37+
};
38+
39+
// Helper function to check if text contains key information
40+
const containsKeyInfo = (text: string, keyInfo: string): boolean => {
41+
const normalizedText = normalizeText(text);
42+
const normalizedKeyInfo = normalizeText(keyInfo);
43+
44+
// Check for exact match or key words
45+
if (normalizedText.includes(normalizedKeyInfo)) return true;
46+
47+
// Check for semantic similarity (simple keyword matching)
48+
const keyWords = normalizedKeyInfo.split(' ').filter((word) => word.length > 2);
49+
const textWords = normalizedText.split(' ');
50+
51+
const matchingWords = keyWords.filter((keyWord) =>
52+
textWords.some((textWord) => textWord.includes(keyWord) || keyWord.includes(textWord))
53+
);
54+
55+
return matchingWords.length >= Math.ceil(keyWords.length * 0.6); // 60% word match
56+
};
57+
58+
// Enhanced validation functions
59+
const validateAnswerConsistency = (exercise: ExerciseContent): boolean => {
60+
const { correctAnswer, options, relevantText } = exercise;
61+
62+
if (!correctAnswer || !options[correctAnswer as keyof typeof options]) {
63+
return false;
64+
}
65+
66+
const correctAnswerText = options[correctAnswer as keyof typeof options];
67+
68+
// Check if the correct answer text is supported by the relevant text
69+
return containsKeyInfo(relevantText, correctAnswerText);
70+
};
71+
72+
const validateExplanationConsistency = (exercise: ExerciseContent): boolean => {
73+
const { correctAnswer, allExplanations, options } = exercise;
74+
75+
if (!correctAnswer) return false;
76+
77+
const correctAnswerText = options[correctAnswer as keyof typeof options];
78+
const correctExplanation = allExplanations[correctAnswer as keyof typeof allExplanations];
79+
80+
// Check if the correct explanation mentions the correct answer
81+
return containsKeyInfo(correctExplanation, correctAnswerText);
82+
};
83+
84+
const validateQuestionAnswerCoherence = (exercise: ExerciseContent): boolean => {
85+
const { question, paragraph, relevantText } = exercise;
86+
87+
// Check if the question can be answered from the paragraph
88+
const questionKeywords = normalizeText(question)
89+
.split(' ')
90+
.filter(
91+
(word) => word.length > 3 && !['what', 'where', 'when', 'who', 'how', 'why'].includes(word)
92+
);
93+
94+
const paragraphWords = normalizeText(paragraph).split(' ');
95+
const relevantWords = normalizeText(relevantText).split(' ');
96+
97+
// Check if question keywords appear in the paragraph or relevant text
98+
const matchingKeywords = questionKeywords.filter(
99+
(keyword) =>
100+
paragraphWords.some((word) => word.includes(keyword) || keyword.includes(word)) ||
101+
relevantWords.some((word) => word.includes(keyword) || keyword.includes(word))
102+
);
103+
104+
return matchingKeywords.length >= Math.ceil(questionKeywords.length * 0.5); // 50% keyword match
105+
};
106+
107+
const validateSemanticAnswerValidation = (exercise: ExerciseContent): boolean => {
108+
const { correctAnswer, options, relevantText, paragraph } = exercise;
109+
110+
if (!correctAnswer || !options[correctAnswer as keyof typeof options]) {
111+
return false;
112+
}
113+
114+
const correctAnswerText = options[correctAnswer as keyof typeof options];
115+
116+
// Check if the correct answer is semantically supported by the passage
117+
const passageText = `${paragraph} ${relevantText}`;
118+
return containsKeyInfo(passageText, correctAnswerText);
119+
};
120+
25121
export const validateQuestionQuality = (
26122
exercise: ExerciseContent,
27123
_level: CEFRLevel
28124
): ValidationResult => {
125+
// Calculate enhanced validation metrics
126+
const answerConsistency = validateAnswerConsistency(exercise);
127+
const explanationConsistency = validateExplanationConsistency(exercise);
128+
const questionAnswerCoherence = validateQuestionAnswerCoherence(exercise);
129+
const semanticAnswerValidation = validateSemanticAnswerValidation(exercise);
130+
29131
const metrics: QualityMetrics = {
30132
questionLength: exercise.question.length,
31133
explanationLength: Object.values(exercise.allExplanations).reduce(
@@ -40,6 +142,11 @@ export const validateQuestionQuality = (
40142
Object.keys(exercise.allExplanations).length === 4 &&
41143
Object.values(exercise.allExplanations).every((exp) => exp.length > 0),
42144
relevantTextInParagraph: exercise.paragraph.includes(exercise.relevantText),
145+
// Enhanced validation metrics
146+
answerConsistency,
147+
explanationConsistency,
148+
questionAnswerCoherence,
149+
semanticAnswerValidation,
43150
};
44151

45152
if (!metrics.hasCorrectAnswer) {
@@ -70,6 +177,39 @@ export const validateQuestionQuality = (
70177
return { isValid: false, reason: 'One or more options are too short.', metrics };
71178
}
72179

180+
// Enhanced validation checks
181+
if (!metrics.answerConsistency) {
182+
return {
183+
isValid: false,
184+
reason: 'Correct answer is not supported by the relevant text.',
185+
metrics,
186+
};
187+
}
188+
189+
if (!metrics.explanationConsistency) {
190+
return {
191+
isValid: false,
192+
reason: 'Correct explanation does not match the correct answer.',
193+
metrics,
194+
};
195+
}
196+
197+
if (!metrics.questionAnswerCoherence) {
198+
return {
199+
isValid: false,
200+
reason: 'Question cannot be answered from the provided passage.',
201+
metrics,
202+
};
203+
}
204+
205+
if (!metrics.semanticAnswerValidation) {
206+
return {
207+
isValid: false,
208+
reason: 'Correct answer is not semantically supported by the passage.',
209+
metrics,
210+
};
211+
}
212+
73213
// Add more sophisticated checks based on CEFR level if needed
74214
// For example, checking vocabulary complexity, sentence structure complexity, etc.
75215
// This would likely require external NLP libraries or more advanced AI calls.
@@ -86,4 +226,45 @@ export const logQualityMetrics = (metrics: QualityMetrics, level: CEFRLevel, lan
86226
console.log(` Has Correct Answer: ${metrics.hasCorrectAnswer}`);
87227
console.log(` All Explanations Present: ${metrics.allExplanationsPresent}`);
88228
console.log(` Relevant Text In Paragraph: ${metrics.relevantTextInParagraph}`);
229+
console.log(` Answer Consistency: ${metrics.answerConsistency}`);
230+
console.log(` Explanation Consistency: ${metrics.explanationConsistency}`);
231+
console.log(` Question-Answer Coherence: ${metrics.questionAnswerCoherence}`);
232+
console.log(` Semantic Answer Validation: ${metrics.semanticAnswerValidation}`);
233+
};
234+
235+
// Debug function to help identify validation issues
236+
export const debugValidationFailure = (exercise: ExerciseContent, reason: string) => {
237+
console.error(`[ValidationDebug] Question validation failed: ${reason}`);
238+
console.error(`[ValidationDebug] Exercise data:`, {
239+
question: exercise.question,
240+
correctAnswer: exercise.correctAnswer,
241+
options: exercise.options,
242+
relevantText: exercise.relevantText,
243+
paragraph: exercise.paragraph.substring(0, 200) + '...',
244+
});
245+
246+
// Check specific validation issues
247+
const answerConsistency = validateAnswerConsistency(exercise);
248+
const explanationConsistency = validateExplanationConsistency(exercise);
249+
const questionAnswerCoherence = validateQuestionAnswerCoherence(exercise);
250+
const semanticAnswerValidation = validateSemanticAnswerValidation(exercise);
251+
252+
console.error(`[ValidationDebug] Individual checks:`, {
253+
answerConsistency,
254+
explanationConsistency,
255+
questionAnswerCoherence,
256+
semanticAnswerValidation,
257+
});
258+
259+
// Show what the correct answer should be based on the text
260+
if (
261+
exercise.correctAnswer &&
262+
exercise.options[exercise.correctAnswer as keyof typeof exercise.options]
263+
) {
264+
const correctAnswerText =
265+
exercise.options[exercise.correctAnswer as keyof typeof exercise.options];
266+
console.error(`[ValidationDebug] Correct answer text: "${correctAnswerText}"`);
267+
console.error(`[ValidationDebug] Relevant text: "${exercise.relevantText}"`);
268+
console.error(`[ValidationDebug] Answer supported by relevant text: ${answerConsistency}`);
269+
}
89270
};

0 commit comments

Comments
 (0)