1
1
# frozen_string_literal: true
2
2
3
- # Parallelize process executation.
3
+ # A class to parallelize process executation.
4
+ # This is a utility class to execute tasks in parallel, with our own forking implementation
5
+ # that passes through logs and reliably handles errors. If parallel processing has been disabled,
6
+ # this instead executes the tasks serially, but provides the same API as the parallel tasks.
4
7
5
8
require 'stringio'
6
9
7
10
module OctocatalogDiff
8
11
module Util
9
- # This is a utility class to execute tasks in parallel, using the 'parallel' gem.
10
- # If parallel processing has been disabled, this instead executes the tasks serially,
11
- # but provides the same API as the parallel tasks.
12
12
class Parallel
13
- # This class is called for a task that didn't complete.
13
+ # This exception is called for a task that didn't complete.
14
14
class IncompleteTask < RuntimeError ; end
15
15
16
+ # --------------------------------------
16
17
# This class represents a parallel task. It requires a method reference, which will be executed with
17
18
# any supplied arguments. It can optionally take a text description and a validator function.
19
+ # --------------------------------------
18
20
class Task
19
21
attr_reader :description
20
22
attr_accessor :args
@@ -37,10 +39,12 @@ def validate(result, logger = Logger.new(StringIO.new))
37
39
end
38
40
end
39
41
42
+ # --------------------------------------
40
43
# This class represents the result from a parallel task. The status is set to true (success), false (error),
41
44
# or nil (task was killed before it could complete). The exception (for failure) and output object (for success)
42
45
# are readable attributes. The validity of the results, determined by executing the 'validate' method of the Task,
43
46
# is available to be set and fetched.
47
+ # --------------------------------------
44
48
class Result
45
49
attr_reader :output , :args
46
50
attr_accessor :status , :exception
@@ -53,65 +57,66 @@ def initialize(opts = {})
53
57
end
54
58
end
55
59
60
+ # --------------------------------------
61
+ # Static methods in the class
62
+ # --------------------------------------
63
+
56
64
# Entry point for parallel processing. By default this will perform parallel processing,
57
65
# but it will also accept an option to do serial processing instead.
58
66
# @param task_array [Array<Parallel::Task>] Tasks to run
59
67
# @param logger [Logger] Optional logger object
60
68
# @param parallelized [Boolean] True for parallel processing, false for serial processing
69
+ # @param raise_exception [Boolean] True to raise exception immediately if one occurs; false to return exception in results
61
70
# @return [Array<Parallel::Result>] Parallel results (same order as tasks)
62
- #
63
- # Note: Parallelization throws intermittent errors under travis CI, so it will be disabled by
64
- # default for integration tests.
65
71
def self . run_tasks ( task_array , logger = nil , parallelized = true , raise_exception = false )
66
72
# Create a throwaway logger object if one is not given
67
73
logger ||= Logger . new ( StringIO . new )
68
74
69
- # Validate input - we need an array. If the array is empty then return an empty array right away.
75
+ # Validate input - we need an array of OctocatalogDiff::Util::Parallel::Task. If the array is empty then
76
+ # return an empty array right away.
70
77
raise ArgumentError , "run_tasks() argument must be array, not #{ task_array . class } " unless task_array . is_a? ( Array )
71
78
return [ ] if task_array . empty?
72
79
73
- # Make sure each element in the array is a OctocatalogDiff::Util::Parallel::Task
74
- task_array . each do | x |
75
- next if x . is_a? ( OctocatalogDiff :: Util :: Parallel :: Task )
76
- raise ArgumentError , "Element #{ x . inspect } must be a OctocatalogDiff::Util::Parallel::Task, not a #{ x . class } "
80
+ invalid_inputs = task_array . reject { | task | task . is_a? ( OctocatalogDiff ::Util ::Parallel ::Task ) }
81
+ if invalid_inputs . any?
82
+ ele = invalid_inputs . first
83
+ raise ArgumentError , "Element #{ ele . inspect } must be a OctocatalogDiff::Util::Parallel::Task, not a #{ ele . class } "
77
84
end
78
85
79
- result = task_array . map do |x |
80
- Result . new ( exception : IncompleteTask . new ( 'Killed' ) , args : x . args )
81
- end
86
+ # Initialize the result array. For now all entries in the array indicate that the task was killed.
87
+ # Actual statuses will replace this initial status. If the initial status wasn't replaced, then indeed,
88
+ # the task was killed.
89
+ result = task_array . map { |x | Result . new ( exception : IncompleteTask . new ( 'Killed' ) , args : x . args ) }
82
90
logger . debug "Initialized parallel task result array: size=#{ result . size } "
83
91
84
- exception = if parallelized
85
- run_tasks_parallel ( result , task_array , logger )
86
- else
87
- run_tasks_serial ( result , task_array , logger )
88
- end
89
-
92
+ # Execute as per the requested method (serial or parallel) and handle results.
93
+ exception = parallelized ? run_tasks_parallel ( result , task_array , logger ) : run_tasks_serial ( result , task_array , logger )
90
94
raise exception if exception && raise_exception
91
95
result
92
96
end
93
97
94
- # Use the parallel gem to run each task in the task array. Under the hood this is forking a process for
95
- # each task, and serializing/deserializing the arguments and the outputs.
98
+ # Utility method! Not intended to be called from outside this class.
99
+ # ---
100
+ # Use a forking strategy to run tasks in parallel. Each task in the array is forked in a child
101
+ # process, and when that task completes it writes its result (OctocatalogDiff::Util::Parallel::Result)
102
+ # into a serialized data file. Once children are forked this method waits for their return, deserializing
103
+ # the output from each data file and updating the `result` array with actual results.
96
104
# @param result [Array<OctocatalogDiff::Util::Parallel::Result>] Parallel task results
97
105
# @param task_array [Array<OctocatalogDiff::Util::Parallel::Task>] Tasks to perform
98
106
# @param logger [Logger] Logger
107
+ # @return [Exception] First exception encountered by a child process; returns nil if no exceptions encountered.
99
108
def self . run_tasks_parallel ( result , task_array , logger )
100
109
pidmap = { }
101
110
ipc_tempdir = Dir . mktmpdir
102
111
112
+ # Child process forking
103
113
task_array . each_with_index do |task , index |
104
114
# simplecov doesn't see this because it's forked
105
- # Kernel.exit! avoids at_exit calls possibly set up by rspec tests
106
115
# :nocov:
107
116
this_pid = fork do
108
- begin
109
- task_result = execute_task ( task , logger )
110
- File . open ( File . join ( ipc_tempdir , "#{ Process . pid } .yaml" ) , 'w' ) { |f | f . write Marshal . dump ( task_result ) }
111
- Kernel . exit! 0
112
- rescue
113
- Kernel . exit! 255
114
- end
117
+ task_result = execute_task ( task , logger )
118
+ File . open ( File . join ( ipc_tempdir , "#{ Process . pid } .dat" ) , 'w' ) { |f | f . write Marshal . dump ( task_result ) }
119
+ Kernel . exit! 0 # Kernel.exit! avoids at_exit from parents being triggered by children exiting
115
120
end
116
121
# :nocov:
117
122
@@ -120,16 +125,16 @@ def self.run_tasks_parallel(result, task_array, logger)
120
125
logger . reopen if logger . respond_to? ( :reopen )
121
126
end
122
127
128
+ # Waiting for children and handling results
123
129
while pidmap . any?
124
- # Wait for exits
125
130
this_pid , exit_obj = Process . wait2 ( 0 )
126
131
next unless this_pid && pidmap . key? ( this_pid )
127
132
index = pidmap [ this_pid ] [ :index ]
128
133
exitstatus = exit_obj . exitstatus
129
134
raise "PID=#{ this_pid } exited abnormally: #{ exit_obj . inspect } " if exitstatus . nil?
130
135
raise "PID=#{ this_pid } exited with status #{ exitstatus } " unless exitstatus . zero?
131
136
132
- input = File . read ( File . join ( ipc_tempdir , "#{ this_pid } .yaml " ) )
137
+ input = File . read ( File . join ( ipc_tempdir , "#{ this_pid } .dat " ) )
133
138
result [ index ] = Marshal . load ( input ) # rubocop:disable Security/MarshalLoad
134
139
time_delta = Time . now - pidmap [ this_pid ] [ :start_time ]
135
140
pidmap . delete ( this_pid )
@@ -139,7 +144,11 @@ def self.run_tasks_parallel(result, task_array, logger)
139
144
next if result [ index ] . status
140
145
return result [ index ] . exception
141
146
end
142
- nil
147
+
148
+ logger . debug 'All child processes completed with no exceptions raised'
149
+
150
+ # Cleanup: Kill any child processes that are still running, and clean the temporary directory
151
+ # where data files were stored.
143
152
ensure
144
153
pidmap . each do |pid , _pid_data |
145
154
begin
@@ -161,6 +170,8 @@ def self.run_tasks_parallel(result, task_array, logger)
161
170
end
162
171
end
163
172
173
+ # Utility method! Not intended to be called from outside this class.
174
+ # ---
164
175
# Perform the tasks in serial.
165
176
# @param result [Array<OctocatalogDiff::Util::Parallel::Result>] Parallel task results
166
177
# @param task_array [Array<OctocatalogDiff::Util::Parallel::Task>] Tasks to perform
@@ -177,7 +188,11 @@ def self.run_tasks_serial(result, task_array, logger)
177
188
nil
178
189
end
179
190
180
- # Process a single task.
191
+ # Utility method! Not intended to be called from outside this class.
192
+ # ---
193
+ # Process a single task. Called by run_tasks_parallel / run_tasks_serial.
194
+ # This method will report all exceptions in the OctocatalogDiff::Util::Parallel::Result object
195
+ # itself, and not raise them.
181
196
# @param task [OctocatalogDiff::Util::Parallel::Task] Task object
182
197
# @param logger [Logger] Logger
183
198
# @return [OctocatalogDiff::Util::Parallel::Result] Parallel task result
@@ -188,13 +203,16 @@ def self.execute_task(task, logger)
188
203
result = Result . new ( output : output , status : true , args : task . args )
189
204
rescue => exc
190
205
logger . debug ( "Failed #{ task . description } : #{ exc . class } #{ exc . message } " )
206
+ # Immediately return without running the validation, since this already failed.
191
207
return Result . new ( exception : exc , status : false , args : task . args )
192
208
end
193
209
194
210
begin
195
211
if task . validate ( output , logger )
196
212
logger . debug ( "Success #{ task . description } " )
197
213
else
214
+ # Preferably the validator method raised its own exception. However if it
215
+ # simply returned false, raise our own exception here.
198
216
raise "Failed #{ task . description } validation (unspecified error)"
199
217
end
200
218
rescue => exc
0 commit comments