|
1 | 1 | # frozen_string_literal: true |
2 | 2 |
|
3 | | -namespace :detect_exercise_anomalies do |
4 | | - # uncomment for debug logging: |
5 | | - # logger = Logger.new($stdout) |
6 | | - # logger.level = Logger::DEBUG |
7 | | - # Rails.logger = logger |
| 3 | +class DetectExerciseAnomaliesJob < ApplicationJob |
| 4 | + include TimeHelper |
8 | 5 |
|
9 | | - # rubocop:disable Lint/ConstantDefinitionInBlock, Style/MutableConstant |
10 | 6 | # These factors determine if an exercise is an anomaly, given the average working time (avg): |
11 | 7 | # (avg * MIN_TIME_FACTOR) <= working_time <= (avg * MAX_TIME_FACTOR) |
12 | 8 | MIN_TIME_FACTOR = 0.1 |
|
18 | 14 | # Determines margin below which contributor working times will be considered data errors (e.g. copy/paste solutions) |
19 | 15 | MIN_CONTRIBUTOR_WORKING_TIME = 0.0 |
20 | 16 |
|
21 | | - # Cache exercise working times, because queries are expensive and values do not change between collections |
22 | | - WORKING_TIME_CACHE = {} |
23 | | - AVERAGE_WORKING_TIME_CACHE = {} |
24 | | - # rubocop:enable Lint/ConstantDefinitionInBlock, Style/MutableConstant |
25 | | - |
26 | | - task :with_at_least, %i[number_of_exercises number_of_contributors] => :environment do |_task, args| |
27 | | - include TimeHelper |
| 17 | + def perform(number_of_exercises:, number_of_contributors:) |
| 18 | + # Cache exercise working times, because queries are expensive and values do not change between collections |
| 19 | + @working_time_cache = {} |
| 20 | + @average_working_time_cache = {} |
28 | 21 |
|
29 | 22 | # Set intervalstyle to iso_8601 to avoid problems with time parsing. |
30 | 23 | ApplicationRecord.connection.exec_query("SET intervalstyle = 'iso_8601';") |
31 | 24 |
|
32 | | - number_of_exercises = args[:number_of_exercises] |
33 | | - number_of_contributors = args[:number_of_contributors] |
34 | | - |
35 | 25 | log "Searching for exercise collections with at least #{number_of_exercises} exercises and #{number_of_contributors} contributors." |
36 | 26 | # Get all exercise collections that have at least the specified amount of exercises and at least the specified |
37 | 27 | # number of contributors AND are flagged for anomaly detection |
|
52 | 42 | end |
53 | 43 |
|
54 | 44 | def log(message = '', indent_level = 0, prefix = '') |
55 | | - puts(("\t" * indent_level) + "#{prefix}#{message}") |
| 45 | + Rails.logger.debug { ("\t" * indent_level) + "#{prefix}#{message}" } |
56 | 46 | end |
57 | 47 |
|
58 | 48 | def get_collections(number_of_exercises, number_of_solutions) |
@@ -92,23 +82,23 @@ def find_anomalies(collection) |
92 | 82 | end |
93 | 83 |
|
94 | 84 | def get_average_working_time(exercise) |
95 | | - unless AVERAGE_WORKING_TIME_CACHE.key?(exercise.id) |
| 85 | + unless @average_working_time_cache.key?(exercise.id) |
96 | 86 | seconds = time_to_f exercise.average_working_time |
97 | | - AVERAGE_WORKING_TIME_CACHE[exercise.id] = seconds |
| 87 | + @average_working_time_cache[exercise.id] = seconds |
98 | 88 | end |
99 | | - AVERAGE_WORKING_TIME_CACHE[exercise.id] |
| 89 | + @average_working_time_cache[exercise.id] |
100 | 90 | end |
101 | 91 |
|
102 | 92 | def get_contributor_working_times(exercise) |
103 | | - unless WORKING_TIME_CACHE.key?(exercise.id) |
| 93 | + unless @working_time_cache.key?(exercise.id) |
104 | 94 | exercise.retrieve_working_time_statistics |
105 | | - WORKING_TIME_CACHE[exercise.id] = exercise.working_time_statistics.flat_map do |contributor_type, contributor_id_with_result| |
| 95 | + @working_time_cache[exercise.id] = exercise.working_time_statistics.flat_map do |contributor_type, contributor_id_with_result| |
106 | 96 | contributor_id_with_result.flat_map do |contributor_id, result| |
107 | 97 | {[contributor_type, contributor_id] => result} |
108 | 98 | end |
109 | 99 | end.inject(:merge) |
110 | 100 | end |
111 | | - WORKING_TIME_CACHE[exercise.id] |
| 101 | + @working_time_cache[exercise.id] |
112 | 102 | end |
113 | 103 |
|
114 | 104 | def notify_collection_author(collection, anomalies) |
|
0 commit comments