|
| 1 | +# Service for storing and retrieving OCR match data from Redis. |
| 2 | +# Used to persist OCR parsing results for scanned exam assignments, |
| 3 | +# enabling suggestions for manual student assignment. |
| 4 | +class OcrMatchService |
| 5 | + # Time-to-live for OCR match data in Redis (30 days) |
| 6 | + TTL = 30.days.to_i |
| 7 | + |
| 8 | + class << self |
| 9 | + # Store an OCR match result in Redis |
| 10 | + def store_match(grouping_id, parsed_value, field_type, matched: false, student_id: nil) |
| 11 | + data = { |
| 12 | + parsed_value: parsed_value, |
| 13 | + field_type: field_type, |
| 14 | + timestamp: Time.current.iso8601, |
| 15 | + matched: matched, |
| 16 | + matched_student_id: student_id |
| 17 | + } |
| 18 | + |
| 19 | + redis.setex(match_key(grouping_id), TTL, data.to_json) |
| 20 | + |
| 21 | + # Add to unmatched set if not auto-matched |
| 22 | + unless matched |
| 23 | + redis.sadd(unmatched_set_key, grouping_id) |
| 24 | + redis.expire(unmatched_set_key, TTL) |
| 25 | + end |
| 26 | + end |
| 27 | + |
| 28 | + # Retrieve stored OCR match data for a grouping |
| 29 | + def get_match(grouping_id) |
| 30 | + data = redis.get(match_key(grouping_id)) |
| 31 | + data ? JSON.parse(data, symbolize_names: true) : nil |
| 32 | + end |
| 33 | + |
| 34 | + # Get student suggestions based on stored OCR match using fuzzy matching |
| 35 | + # Only considers students not already assigned to a grouping for this assignment |
| 36 | + # Returns students meeting the similarity threshold (default 80%), limited to top matches (default 5) |
| 37 | + def get_suggestions(grouping_id, course_id, threshold: 0.8, limit: 5) |
| 38 | + match_data = get_match(grouping_id) |
| 39 | + return [] if match_data.nil? |
| 40 | + |
| 41 | + grouping = Grouping.find(grouping_id) |
| 42 | + assignment = grouping.assignment |
| 43 | + course = Course.find(course_id) |
| 44 | + |
| 45 | + # Get students who are not assigned to any grouping for this assignment |
| 46 | + assigned_student_ids = assignment.groupings |
| 47 | + .joins(:student_memberships) |
| 48 | + .pluck('memberships.role_id') |
| 49 | + students = course.students.includes(:user).where.not(id: assigned_student_ids) |
| 50 | + |
| 51 | + # Calculate similarity scores for each student |
| 52 | + suggestions = students.filter_map do |student| |
| 53 | + value_to_match = student_match_value(student, match_data[:field_type]) |
| 54 | + next if value_to_match.blank? |
| 55 | + |
| 56 | + similarity = string_similarity(match_data[:parsed_value], value_to_match) |
| 57 | + next if similarity < threshold |
| 58 | + |
| 59 | + { student: student, similarity: similarity } |
| 60 | + end |
| 61 | + |
| 62 | + # Return top matches by similarity (highest first) |
| 63 | + suggestions.max_by(limit) { |s| s[:similarity] } |
| 64 | + end |
| 65 | + |
| 66 | + # Clear OCR match data after manual assignment |
| 67 | + def clear_match(grouping_id) |
| 68 | + redis.del(match_key(grouping_id)) |
| 69 | + redis.srem(unmatched_set_key, grouping_id) |
| 70 | + end |
| 71 | + |
| 72 | + private |
| 73 | + |
| 74 | + def match_key(grouping_id) |
| 75 | + "ocr_matches:grouping:#{grouping_id}" |
| 76 | + end |
| 77 | + |
| 78 | + def unmatched_set_key |
| 79 | + 'ocr_matches:unmatched' |
| 80 | + end |
| 81 | + |
| 82 | + def redis |
| 83 | + Redis::Namespace.new(Rails.root.to_s, redis: Resque.redis) |
| 84 | + end |
| 85 | + |
| 86 | + # Get the value to match against based on field type |
| 87 | + def student_match_value(student, field_type) |
| 88 | + case field_type |
| 89 | + when 'id_number' then student.user.id_number |
| 90 | + when 'user_name' then student.user.user_name |
| 91 | + end |
| 92 | + end |
| 93 | + |
| 94 | + # Calculate similarity between two strings using Levenshtein distance |
| 95 | + # Returns a score between 0 and 1, where 1 is identical |
| 96 | + def string_similarity(str1, str2) |
| 97 | + return 1.0 if str1 == str2 |
| 98 | + return 0.0 if str1.blank? || str2.blank? |
| 99 | + |
| 100 | + # Normalize strings for case-insensitive comparison |
| 101 | + s1 = str1.to_s.downcase.strip |
| 102 | + s2 = str2.to_s.downcase.strip |
| 103 | + return 1.0 if s1 == s2 |
| 104 | + |
| 105 | + # Use Ruby's built-in Levenshtein distance calculation |
| 106 | + distance = DidYouMean::Levenshtein.distance(s1, s2) |
| 107 | + max_length = [s1.length, s2.length].max |
| 108 | + return 0.0 if max_length.zero? |
| 109 | + |
| 110 | + 1.0 - (distance.to_f / max_length) |
| 111 | + end |
| 112 | + end |
| 113 | +end |
0 commit comments