Skip to content

Commit 6cb94da

Browse files
authored
issue-7449: Offer suggestions to partially matching OCR scans (#7760)
1 parent bf483b4 commit 6cb94da

File tree

13 files changed

+878
-6
lines changed

13 files changed

+878
-6
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
### ✨ New features and improvements
88
- Enable test results downloads through the API (#7754)
9+
- Provide suggestions for partial student matching scans (#7760)
910

1011
### 🐛 Bug fixes
1112

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// OCR Suggestions Styles
2+
// Used in assign_scans view for displaying OCR match data and student suggestions
3+
4+
@import 'constants';
5+
6+
.ocr-suggestions-container {
7+
margin: 1em 0;
8+
padding: 1em;
9+
background-color: $background-support;
10+
border: 1px solid $gridline;
11+
border-radius: var(--radius);
12+
max-height: 400px;
13+
overflow-y: auto;
14+
overflow-x: hidden;
15+
16+
code {
17+
background-color: $disabled-area;
18+
padding: 0.2em 0.4em;
19+
border-radius: 3px;
20+
}
21+
22+
.no-match {
23+
color: $disabled-text;
24+
font-style: italic;
25+
}
26+
}
27+
28+
.ocr-suggestions-list {
29+
margin-top: 0.5em;
30+
position: static;
31+
32+
.ui-menu-item div:hover {
33+
background-color: $primary-three;
34+
color: $sharp-line;
35+
}
36+
37+
.student-info {
38+
font-size: 1.1em;
39+
color: $sharp-line;
40+
font-weight: 500;
41+
}
42+
}

app/assets/stylesheets/common/core.scss

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
@import 'markus';
22
@import 'constants';
33
@import 'navigation';
4+
@import 'ocr_suggestions';
45
@import '../../../../node_modules/@fortawesome/fontawesome-svg-core/styles';
56

67
#about_dialog {

app/controllers/groups_controller.rb

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,18 @@ def assign_scans
147147
if num_valid == num_total
148148
flash_message(:success, t('exam_templates.assign_scans.done'))
149149
end
150+
# Get OCR match data and suggestions if available
151+
ocr_match = OcrMatchService.get_match(next_grouping.id)
152+
ocr_suggestions = ocr_match ? OcrMatchService.get_suggestions(next_grouping.id, current_course.id) : []
153+
150154
@data = {
151155
group_name: next_grouping.group.group_name,
152156
grouping_id: next_grouping.id,
153157
students: names,
154158
num_total: num_total,
155-
num_valid: num_valid
159+
num_valid: num_valid,
160+
ocr_match: ocr_match,
161+
ocr_suggestions: format_ocr_suggestions(ocr_suggestions)
156162
}
157163
next_file = next_grouping.current_submission_used.submission_files.find_by(filename: 'COVER.pdf')
158164
if next_file.nil?
@@ -221,6 +227,8 @@ def assign_student_and_next
221227
end
222228
StudentMembership
223229
.find_or_create_by(role: student, grouping: @grouping, membership_status: StudentMembership::STATUSES[:inviter])
230+
# Clear OCR match data after successful assignment
231+
OcrMatchService.clear_match(@grouping.id)
224232
end
225233
next_grouping
226234
end
@@ -243,20 +251,28 @@ def next_grouping
243251
if num_valid == num_total
244252
flash_message(:success, t('exam_templates.assign_scans.done'))
245253
end
254+
# Get OCR match data and suggestions if available
255+
ocr_match = OcrMatchService.get_match(next_grouping.id)
256+
ocr_suggestions = ocr_match ? OcrMatchService.get_suggestions(next_grouping.id, current_course.id) : []
257+
246258
if !@grouping.nil? && next_grouping.id == @grouping.id
247259
render json: {
248260
grouping_id: next_grouping.id,
249261
students: names,
250262
num_total: num_total,
251-
num_valid: num_valid
263+
num_valid: num_valid,
264+
ocr_match: ocr_match,
265+
ocr_suggestions: format_ocr_suggestions(ocr_suggestions)
252266
}
253267
else
254268
data = {
255269
group_name: next_grouping.group.group_name,
256270
grouping_id: next_grouping.id,
257271
students: names,
258272
num_total: num_total,
259-
num_valid: num_valid
273+
num_valid: num_valid,
274+
ocr_match: ocr_match,
275+
ocr_suggestions: format_ocr_suggestions(ocr_suggestions)
260276
}
261277
next_file = next_grouping.current_submission_used.submission_files.find_by(filename: 'COVER.pdf')
262278
unless next_file.nil?
@@ -728,6 +744,19 @@ def remove_member(membership, grouping)
728744
grouping.reload
729745
end
730746

747+
# Format OCR suggestions for JSON response
748+
def format_ocr_suggestions(ocr_suggestions)
749+
ocr_suggestions.map do |s|
750+
{
751+
id: s[:student].id,
752+
user_name: s[:student].user.user_name,
753+
id_number: s[:student].user.id_number,
754+
display_name: s[:student].user.display_name,
755+
similarity: (s[:similarity] * 100).round(1)
756+
}
757+
end
758+
end
759+
731760
# This override is necessary because this controller is acting as a controller
732761
# for both groups and groupings.
733762
#

app/javascript/application_webpack.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ import {refreshOrLogout} from "./common/refresh_or_logout";
105105
window.refreshOrLogout = refreshOrLogout;
106106
import {ModalMarkus} from "./common/modals";
107107
window.ModalMarkus = ModalMarkus;
108+
import {updateOcrSuggestions} from "./common/ocr_suggestions";
109+
window.updateOcrSuggestions = updateOcrSuggestions;
108110
import {makeDashboard} from "./Components/dashboard";
109111
window.makeDashboard = makeDashboard;
110112
import {makeAssignmentSummary} from "./Components/assignment_summary";
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/**
2+
* OCR Suggestions Module
3+
* Handles display and interaction with OCR match data and student suggestions
4+
* Used in the assign_scans view for exam template processing
5+
*/
6+
7+
export function updateOcrSuggestions(ocrMatch, suggestions = []) {
8+
const container = $("#ocr_suggestions");
9+
container.empty();
10+
11+
if (!ocrMatch) {
12+
container.hide();
13+
return;
14+
}
15+
16+
container.show();
17+
18+
// internationalization
19+
const noId = I18n.t("exam_templates.assign_scans.no_id");
20+
const idNumber = I18n.t("activerecord.attributes.user.id_number");
21+
const userName = I18n.t("activerecord.attributes.user.user_name");
22+
const suggestedStudents = I18n.t("exam_templates.assign_scans.suggested_students");
23+
const noSimilarStudents = I18n.t("exam_templates.assign_scans.no_similar_students");
24+
25+
const ocrDisplay = $("<p></p>");
26+
// Display the parsed OCR value
27+
const parsedValue = ocrMatch.parsed_value;
28+
const fieldType = ocrMatch.field_type === "id_number" ? idNumber : userName;
29+
const ocrDetected = I18n.t("exam_templates.assign_scans.ocr_detected", {field_type: fieldType});
30+
31+
ocrDisplay.append(`<strong>${ocrDetected}</strong>`);
32+
const codeElem = $("<code></code>").text(parsedValue);
33+
ocrDisplay.append(codeElem);
34+
container.append(ocrDisplay);
35+
36+
if (suggestions.length == 0) {
37+
return container.append(`<p class="no-match">${noSimilarStudents}</p>`);
38+
}
39+
40+
// Display suggestions if available
41+
container.append(`<p><strong>${suggestedStudents}</strong></p>`);
42+
const list = $('<ul class="ui-menu ocr-suggestions-list"></ul>');
43+
44+
suggestions.forEach(function (suggestion) {
45+
const similarity = suggestion.similarity;
46+
const item = $('<li class="ui-menu-item"></li>');
47+
const content = $("<div></div>");
48+
49+
// Use .text() to safely insert user-supplied data and prevent XSS
50+
const nameElem = $("<strong></strong>").text(suggestion.display_name);
51+
const infoText = `${suggestion.id_number || noId} | ${suggestion.user_name}`;
52+
const infoElem = $('<span class="student-info"></span>').text(infoText);
53+
54+
content.append(nameElem);
55+
content.append(` (${similarity}%)`);
56+
content.append("<br>");
57+
content.append(infoElem);
58+
59+
content.on("click", function () {
60+
$("#student_id").val(suggestion.id);
61+
$("#names").val(suggestion.display_name);
62+
$("#names").focus();
63+
});
64+
65+
item.append(content);
66+
list.append(item);
67+
});
68+
69+
container.append(list);
70+
}

app/jobs/auto_match_job.rb

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@ def perform(groupings, exam_template)
5050
next unless status.success? && parsed.length == 1
5151

5252
student = match_student(parsed[0], exam_template)
53+
54+
# Store OCR match result in Redis for later suggestions
55+
OcrMatchService.store_match(
56+
grouping.id,
57+
parsed[0],
58+
exam_template.cover_fields,
59+
matched: !student.nil?,
60+
student_id: student&.id
61+
)
62+
5363
unless student.nil?
5464
StudentMembership.find_or_create_by(role: student,
5565
grouping: grouping,
@@ -67,7 +77,7 @@ def perform(groupings, exam_template)
6777
def match_student(parsed, exam_template)
6878
case exam_template.cover_fields
6979
when 'id_number'
70-
Student.joins(:user).find_by('user.id_number': parsed)
80+
Student.joins(:user).find_by('users.id_number': parsed)
7181
when 'user_name'
7282
Student.joins(:user).find_by(User.arel_table[:user_name].matches(parsed))
7383
end

app/lib/ocr_match_service.rb

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# Service for storing and retrieving OCR match data from Redis.
2+
# Used to persist OCR parsing results for scanned exam assignments,
3+
# enabling suggestions for manual student assignment.
4+
class OcrMatchService
5+
# Time-to-live for OCR match data in Redis (30 days)
6+
TTL = 30.days.to_i
7+
8+
class << self
9+
# Store an OCR match result in Redis
10+
def store_match(grouping_id, parsed_value, field_type, matched: false, student_id: nil)
11+
data = {
12+
parsed_value: parsed_value,
13+
field_type: field_type,
14+
timestamp: Time.current.iso8601,
15+
matched: matched,
16+
matched_student_id: student_id
17+
}
18+
19+
redis.setex(match_key(grouping_id), TTL, data.to_json)
20+
21+
# Add to unmatched set if not auto-matched
22+
unless matched
23+
redis.sadd(unmatched_set_key, grouping_id)
24+
redis.expire(unmatched_set_key, TTL)
25+
end
26+
end
27+
28+
# Retrieve stored OCR match data for a grouping
29+
def get_match(grouping_id)
30+
data = redis.get(match_key(grouping_id))
31+
data ? JSON.parse(data, symbolize_names: true) : nil
32+
end
33+
34+
# Get student suggestions based on stored OCR match using fuzzy matching
35+
# Only considers students not already assigned to a grouping for this assignment
36+
# Returns students meeting the similarity threshold (default 80%), limited to top matches (default 5)
37+
def get_suggestions(grouping_id, course_id, threshold: 0.8, limit: 5)
38+
match_data = get_match(grouping_id)
39+
return [] if match_data.nil?
40+
41+
grouping = Grouping.find(grouping_id)
42+
assignment = grouping.assignment
43+
course = Course.find(course_id)
44+
45+
# Get students who are not assigned to any grouping for this assignment
46+
assigned_student_ids = assignment.groupings
47+
.joins(:student_memberships)
48+
.pluck('memberships.role_id')
49+
students = course.students.includes(:user).where.not(id: assigned_student_ids)
50+
51+
# Calculate similarity scores for each student
52+
suggestions = students.filter_map do |student|
53+
value_to_match = student_match_value(student, match_data[:field_type])
54+
next if value_to_match.blank?
55+
56+
similarity = string_similarity(match_data[:parsed_value], value_to_match)
57+
next if similarity < threshold
58+
59+
{ student: student, similarity: similarity }
60+
end
61+
62+
# Return top matches by similarity (highest first)
63+
suggestions.max_by(limit) { |s| s[:similarity] }
64+
end
65+
66+
# Clear OCR match data after manual assignment
67+
def clear_match(grouping_id)
68+
redis.del(match_key(grouping_id))
69+
redis.srem(unmatched_set_key, grouping_id)
70+
end
71+
72+
private
73+
74+
def match_key(grouping_id)
75+
"ocr_matches:grouping:#{grouping_id}"
76+
end
77+
78+
def unmatched_set_key
79+
'ocr_matches:unmatched'
80+
end
81+
82+
def redis
83+
Redis::Namespace.new(Rails.root.to_s, redis: Resque.redis)
84+
end
85+
86+
# Get the value to match against based on field type
87+
def student_match_value(student, field_type)
88+
case field_type
89+
when 'id_number' then student.user.id_number
90+
when 'user_name' then student.user.user_name
91+
end
92+
end
93+
94+
# Calculate similarity between two strings using Levenshtein distance
95+
# Returns a score between 0 and 1, where 1 is identical
96+
def string_similarity(str1, str2)
97+
return 1.0 if str1 == str2
98+
return 0.0 if str1.blank? || str2.blank?
99+
100+
# Normalize strings for case-insensitive comparison
101+
s1 = str1.to_s.downcase.strip
102+
s2 = str2.to_s.downcase.strip
103+
return 1.0 if s1 == s2
104+
105+
# Use Ruby's built-in Levenshtein distance calculation
106+
distance = DidYouMean::Levenshtein.distance(s1, s2)
107+
max_length = [s1.length, s2.length].max
108+
return 0.0 if max_length.zero?
109+
110+
1.0 - (distance.to_f / max_length)
111+
end
112+
end
113+
end

app/views/groups/assign_scans.html.erb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@
4949
$("#assign_student").find("#skip").val("1").prop('checked', false);
5050
let current_group = $("#grouping_id").val();
5151
update_bar(data.num_valid, data.num_total);
52+
53+
// Update OCR suggestions
54+
updateOcrSuggestions(data.ocr_match, data.ocr_suggestions);
55+
5256
// Anytime we advance to another assignment
5357
if (data.grouping_id !== current_group) {
5458
$("#grouping_id").val(data.grouping_id);
@@ -134,6 +138,12 @@
134138
<button type="submit"><%= t('save') %></button>
135139
</p>
136140
</form>
141+
142+
<!-- OCR Suggestions Section -->
143+
<div id="ocr_suggestions" class="ocr-suggestions-container" style="display: none;">
144+
<!-- Will be populated by JavaScript -->
145+
</div>
146+
137147
<h3>
138148
<%= Group.human_attribute_name(:student_memberships) %>
139149
</h3>

config/locales/views/exam_templates/en.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@ en:
1717
done: All groups have been successfully assigned students
1818
help: Assign students to scanned exam groups based on printed student names.
1919
no_cover_page: This submission does not have a cover page.
20+
no_id: No ID
21+
no_similar_students: No similar students found. Please assign manually.
2022
not_all_submissions_collected: Not all submissions have been collected.
23+
ocr_detected: 'OCR Detected %{field_type}:'
2124
skip_group: Skip group
2225
student_not_found: Student with name %{name} does not exist.
26+
suggested_students: 'Suggested Students:'
2327
title: Assign Scans
2428
back_to_exam_templates_page: Back to Exam Templates page
2529
create:

0 commit comments

Comments
 (0)