github
diff --git a/‎.rubocop.yml
Lines changed: 4 additions & 0 deletions b/‎.rubocop.yml
Lines changed: 4 additions & 0 deletions
diff --git a/‎lib/octocatalog-diff/catalog-util/bootstrap.rb
Lines changed: 3 additions & 0 deletions b/‎lib/octocatalog-diff/catalog-util/bootstrap.rb
Lines changed: 3 additions & 0 deletions
diff --git a/‎lib/octocatalog-diff/catalog-util/fileresources.rb
Lines changed: 6 additions & 1 deletion b/‎lib/octocatalog-diff/catalog-util/fileresources.rb
Lines changed: 6 additions & 1 deletion
diff --git a/‎lib/octocatalog-diff/util/catalogs.rb
Lines changed: 19 additions & 5 deletions b/‎lib/octocatalog-diff/util/catalogs.rb
Lines changed: 19 additions & 5 deletions
diff --git a/‎lib/octocatalog-diff/util/parallel.rb
Lines changed: 138 additions & 83 deletions b/‎lib/octocatalog-diff/util/parallel.rb
Lines changed: 138 additions & 83 deletions
diff --git a/‎octocatalog-diff.gemspec
Lines changed: 0 additions & 1 deletion b/‎octocatalog-diff.gemspec
Lines changed: 0 additions & 1 deletion
@@ -52,6 +52,10 @@ Lint/EndAlignment:
 Style/FileName:
   Enabled: false
 
+# Sometimes it's cleaner without this
+Style/Documentation:
+  Enabled: false
+
 # To fix later
 Style/PercentLiteralDelimiters:
   Enabled: false
 
@@ -74,8 +74,11 @@ def self.bootstrap_directory_parallelizer(options, logger)
           if result.status
             logger.debug("Success bootstrap_directory for #{result.args[:tag]}")
           else
+            # Believed to be a bug condition, since error should have already been raised if this happens.
+            # :nocov:
             errmsg = "Failed bootstrap_directory for #{result.args[:tag]}: #{result.exception.class} #{result.exception.message}"
             raise OctocatalogDiff::Errors::BootstrapError, errmsg
+            # :nocov:
           end
         end
       end
 
@@ -86,7 +86,12 @@ def self._convert_file_resources(resources, compilation_dir, environment = 'prod
         resources.map! do |resource|
           if resource_convertible?(resource)
             path = file_path(resource['parameters']['source'], modulepaths)
-            raise Errno::ENOENT, "Unable to resolve '#{resource['parameters']['source']}'!" if path.nil?
+            if path.nil?
+              # Pass this through as a wrapped exception, because it's more likely to be something wrong
+              # in the catalog itself than it is to be a broken setup of octocatalog-diff.
+              message = "Errno::ENOENT: Unable to resolve '#{resource['parameters']['source']}'!"
+              raise OctocatalogDiff::Errors::CatalogError, message
+            end
 
             if File.file?(path)
               # If the file is found, read its content. If the content is all ASCII, substitute it into
 
@@ -99,6 +99,15 @@ def build_catalog_parallelizer
           # :nocov:
         end
 
+        # If catalogs failed to compile, report that. Prefer to display an actual failure message rather
+        # than a generic incomplete parallel task message if there is a more specific message present.
+        failures = parallel_catalogs.reject(&:status)
+        if failures.any?
+          f = failures.reject { |r| r.exception.is_a?(OctocatalogDiff::Util::Parallel::IncompleteTask) }.first
+          f ||= failures.first
+          raise f.exception
+        end
+
         # Construct result hash. Will eventually be in the format
         # { :from => OctocatalogDiff::Catalog, :to => OctocatalogDiff::Catalog }
 
@@ -203,10 +212,12 @@ def add_parallel_result(result, parallel_catalog_obj, key_task_tuple)
           end
         else
           # Something unhandled went wrong, and an exception was thrown. Reveal a generic message.
+          # :nocov:
           msg = parallel_catalog_obj.exception.message
           message = "Catalog for '#{key}' (#{branch}) failed to compile with #{parallel_catalog_obj.exception.class}: #{msg}"
           message += "\n" + parallel_catalog_obj.exception.backtrace.map { |x| "   #{x}" }.join("\n") if @options[:debug]
           raise OctocatalogDiff::Errors::CatalogError, message
+          # :nocov:
         end
       end
 
@@ -227,15 +238,18 @@ def build_catalog(opts, logger = @logger)
         catalog
       end
 
-      # Validate a catalog in the parallel execution
+      # The catalog validator method can indicate failure one of two ways:
+      # - Raise an exception (this is preferred, since it gives a specific error message)
+      # - Return false (supported but discouraged, since it only surfaces a generic error)
       # @param catalog [OctocatalogDiff::Catalog] Catalog object
       # @param logger [Logger] Logger object (presently unused)
       # @param args [Hash] Additional arguments set specifically for validator
-      # @return [Boolean] true if catalog is valid, false otherwise
+      # @return [Boolean] Return true if catalog is valid, false otherwise
       def catalog_validator(catalog = nil, _logger = @logger, args = {})
-        return false unless catalog.is_a?(OctocatalogDiff::Catalog)
-        catalog.validate_references if args[:task] == :to
-        catalog.valid?
+        raise ArgumentError, "Expects a catalog, got #{catalog.class}" unless catalog.is_a?(OctocatalogDiff::Catalog)
+        raise OctocatalogDiff::Errors::CatalogError, "Catalog failed: #{catalog.error_message}" unless catalog.valid?
+        catalog.validate_references if args[:task] == :to # Raises exception for broken references
+        true
       end
     end
   end
 
@@ -1,18 +1,22 @@
 # frozen_string_literal: true
 
-# Helper to use the 'parallel' gem to perform tasks
+# A class to parallelize process executation.
+# This is a utility class to execute tasks in parallel, with our own forking implementation
+# that passes through logs and reliably handles errors. If parallel processing has been disabled,
+# this instead executes the tasks serially, but provides the same API as the parallel tasks.
 
-require 'parallel'
 require 'stringio'
 
 module OctocatalogDiff
   module Util
-    # This is a utility class to execute tasks in parallel, using the 'parallel' gem.
-    # If parallel processing has been disabled, this instead executes the tasks serially,
-    # but provides the same API as the parallel tasks.
     class Parallel
+      # This exception is called for a task that didn't complete.
+      class IncompleteTask < RuntimeError; end
+
+      # --------------------------------------
       # This class represents a parallel task. It requires a method reference, which will be executed with
       # any supplied arguments. It can optionally take a text description and a validator function.
+      # --------------------------------------
       class Task
         attr_reader :description
         attr_accessor :args
@@ -35,10 +39,12 @@ def validate(result, logger = Logger.new(StringIO.new))
         end
       end
 
+      # --------------------------------------
       # This class represents the result from a parallel task. The status is set to true (success), false (error),
       # or nil (task was killed before it could complete). The exception (for failure) and output object (for success)
       # are readable attributes. The validity of the results, determined by executing the 'validate' method of the Task,
       # is available to be set and fetched.
+      # --------------------------------------
       class Result
         attr_reader :output, :args
         attr_accessor :status, :exception
@@ -51,121 +57,170 @@ def initialize(opts = {})
         end
       end
 
+      # --------------------------------------
+      # Static methods in the class
+      # --------------------------------------
+
       # Entry point for parallel processing. By default this will perform parallel processing,
       # but it will also accept an option to do serial processing instead.
       # @param task_array [Array<Parallel::Task>] Tasks to run
       # @param logger [Logger] Optional logger object
       # @param parallelized [Boolean] True for parallel processing, false for serial processing
+      # @param raise_exception [Boolean] True to raise exception immediately if one occurs; false to return exception in results
       # @return [Array<Parallel::Result>] Parallel results (same order as tasks)
-      def self.run_tasks(task_array, logger = nil, parallelized = true)
+      def self.run_tasks(task_array, logger = nil, parallelized = true, raise_exception = false)
         # Create a throwaway logger object if one is not given
         logger ||= Logger.new(StringIO.new)
 
-        # Validate input - we need an array. If the array is empty then return an empty array right away.
+        # Validate input - we need an array of OctocatalogDiff::Util::Parallel::Task. If the array is empty then
+        # return an empty array right away.
         raise ArgumentError, "run_tasks() argument must be array, not #{task_array.class}" unless task_array.is_a?(Array)
         return [] if task_array.empty?
 
-        # Make sure each element in the array is a OctocatalogDiff::Util::Parallel::Task
-        task_array.each do |x|
-          next if x.is_a?(OctocatalogDiff::Util::Parallel::Task)
-          raise ArgumentError, "Element #{x.inspect} must be a OctocatalogDiff::Util::Parallel::Task, not a #{x.class}"
+        invalid_inputs = task_array.reject { |task| task.is_a?(OctocatalogDiff::Util::Parallel::Task) }
+        if invalid_inputs.any?
+          ele = invalid_inputs.first
+          raise ArgumentError, "Element #{ele.inspect} must be a OctocatalogDiff::Util::Parallel::Task, not a #{ele.class}"
         end
 
-        # Actually do the processing - choose here between parallel and serial
-        parallelized ? run_tasks_parallel(task_array, logger) : run_tasks_serial(task_array, logger)
+        # Initialize the result array. For now all entries in the array indicate that the task was killed.
+        # Actual statuses will replace this initial status. If the initial status wasn't replaced, then indeed,
+        # the task was killed.
+        result = task_array.map { |x| Result.new(exception: IncompleteTask.new('Killed'), args: x.args) }
+        logger.debug "Initialized parallel task result array: size=#{result.size}"
+
+        # Execute as per the requested method (serial or parallel) and handle results.
+        exception = parallelized ? run_tasks_parallel(result, task_array, logger) : run_tasks_serial(result, task_array, logger)
+        raise exception if exception && raise_exception
+        result
       end
 
-      # Use the parallel gem to run each task in the task array. Under the hood this is forking a process for
-      # each task, and serializing/deserializing the arguments and the outputs.
+      # Utility method! Not intended to be called from outside this class.
+      # ---
+      # Use a forking strategy to run tasks in parallel. Each task in the array is forked in a child
+      # process, and when that task completes it writes its result (OctocatalogDiff::Util::Parallel::Result)
+      # into a serialized data file. Once children are forked this method waits for their return, deserializing
+      # the output from each data file and updating the `result` array with actual results.
+      # @param result [Array<OctocatalogDiff::Util::Parallel::Result>] Parallel task results
       # @param task_array [Array<OctocatalogDiff::Util::Parallel::Task>] Tasks to perform
       # @param logger [Logger] Logger
-      # @return [Array<OctocatalogDiff::Util::Parallel::Result>] Parallel task results
-      def self.run_tasks_parallel(task_array, logger)
-        # Create an empty array of results. The status is nil and the exception is pre-populated. If the code
-        # runs successfully and doesn't get killed, all of these default values will be overwritten. If the code
-        # gets killed before the task finishes, this exception will remain.
-        result = task_array.map do |x|
-          Result.new(exception: ::Parallel::Kill.new('Killed'), args: x.args)
+      # @return [Exception] First exception encountered by a child process; returns nil if no exceptions encountered.
+      def self.run_tasks_parallel(result, task_array, logger)
+        pidmap = {}
+        ipc_tempdir = Dir.mktmpdir
+
+        # Child process forking
+        task_array.each_with_index do |task, index|
+          # simplecov doesn't see this because it's forked
+          # :nocov:
+          this_pid = fork do
+            task_result = execute_task(task, logger)
+            File.open(File.join(ipc_tempdir, "#{Process.pid}.dat"), 'w') { |f| f.write Marshal.dump(task_result) }
+            Kernel.exit! 0 # Kernel.exit! avoids at_exit from parents being triggered by children exiting
+          end
+          # :nocov:
+
+          pidmap[this_pid] = { index: index, start_time: Time.now }
+          logger.debug "Launched pid=#{this_pid} for index=#{index}"
+          logger.reopen if logger.respond_to?(:reopen)
         end
-        logger.debug "Initialized parallel task result array: size=#{result.size}"
 
-        # Do parallel processing
-        ::Parallel.each(task_array,
-                        finish: lambda do |item, i, parallel_result|
-                          # Set the result array element to the result
-                          result[i] = parallel_result
-
-                          # Kill all other parallel tasks if this task failed by throwing an exception
-                          raise ::Parallel::Kill unless parallel_result.exception.nil?
-
-                          # Run the validator to determine if the result is in fact valid. The validator
-                          # returns true or false. If true, set the 'valid' attribute in the result. If
-                          # false, kill all other parallel tasks.
-                          if item.validate(parallel_result.output, logger)
-                            logger.debug("Success #{item.description}")
-                          else
-                            logger.warn("Failed #{item.description}")
-                            result[i].status = false
-                            raise ::Parallel::Kill
-                          end
-                        end) do |ele|
-          # simplecov does not detect that this code runs because it's forked, but this is
-          # tested extensively in the parallel_spec.rb spec file.
-          # :nocov:
+        # Waiting for children and handling results
+        while pidmap.any?
+          this_pid, exit_obj = Process.wait2(0)
+          next unless this_pid && pidmap.key?(this_pid)
+          index = pidmap[this_pid][:index]
+          exitstatus = exit_obj.exitstatus
+          raise "PID=#{this_pid} exited abnormally: #{exit_obj.inspect}" if exitstatus.nil?
+          raise "PID=#{this_pid} exited with status #{exitstatus}" unless exitstatus.zero?
+
+          input = File.read(File.join(ipc_tempdir, "#{this_pid}.dat"))
+          result[index] = Marshal.load(input) # rubocop:disable Security/MarshalLoad
+          time_delta = Time.now - pidmap[this_pid][:start_time]
+          pidmap.delete(this_pid)
+
+          logger.debug "PID=#{this_pid} completed in #{time_delta} seconds, #{input.length} bytes"
+
+          next if result[index].status
+          return result[index].exception
+        end
+
+        logger.debug 'All child processes completed with no exceptions raised'
+
+      # Cleanup: Kill any child processes that are still running, and clean the temporary directory
+      # where data files were stored.
+      ensure
+        pidmap.each do |pid, _pid_data|
           begin
-            logger.debug("Begin #{ele.description}")
-            output = ele.execute(logger)
-            logger.debug("Success #{ele.description}")
-            Result.new(output: output, status: true, args: ele.args)
-          rescue => exc
-            logger.debug("Failed #{ele.description}: #{exc.class} #{exc.message}")
-            Result.new(exception: exc, status: false, args: ele.args)
+            Process.kill('TERM', pid)
+          rescue Errno::ESRCH # rubocop:disable Lint/HandleExceptions
+            # If the process doesn't exist, that's fine.
           end
-          # :nocov:
         end
 
-        # Return result
-        result
+        retries = 0
+        while File.directory?(ipc_tempdir) && retries < 10
+          retries += 1
+          begin
+            FileUtils.remove_entry_secure ipc_tempdir
+          rescue Errno::ENOTEMPTY, Errno::ENOENT # rubocop:disable Lint/HandleExceptions
+            # Errno::ENOTEMPTY will trigger a retry because the directory exists
+            # Errno::ENOENT will break the loop because the directory won't exist next time it's checked
+          end
+        end
       end
 
+      # Utility method! Not intended to be called from outside this class.
+      # ---
       # Perform the tasks in serial.
+      # @param result [Array<OctocatalogDiff::Util::Parallel::Result>] Parallel task results
       # @param task_array [Array<OctocatalogDiff::Util::Parallel::Task>] Tasks to perform
       # @param logger [Logger] Logger
-      # @return [Array<OctocatalogDiff::Util::Parallel::Result>] Parallel task results
-      def self.run_tasks_serial(task_array, logger)
-        # Create an empty array of results. The status is nil and the exception is pre-populated. If the code
-        # runs successfully, all of these default values will be overwritten. If a predecessor task fails, all
-        # later task will have the defined exception.
-        result = task_array.map do |x|
-          Result.new(exception: ::RuntimeError.new('Cancellation - A prior task failed'), args: x.args)
-        end
-
+      def self.run_tasks_serial(result, task_array, logger)
         # Perform the tasks 1 by 1 - each successful task will replace an element in the 'result' array,
         # whereas a failed task will replace the current element with an exception, and all later tasks
         # will not be replaced (thereby being populated with the cancellation error).
-        task_counter = 0
-        task_array.each do |ele|
-          begin
-            logger.debug("Begin #{ele.description}")
-            output = ele.execute(logger)
-            result[task_counter] = Result.new(output: output, status: true, args: ele.args)
-          rescue => exc
-            logger.debug("Failed #{ele.description}: #{exc.class} #{exc.message}")
-            result[task_counter] = Result.new(exception: exc, status: false, args: ele.args)
-          end
+        task_array.each_with_index do |ele, task_counter|
+          result[task_counter] = execute_task(ele, logger)
+          next if result[task_counter].status
+          return result[task_counter].exception
+        end
+        nil
+      end
 
-          if ele.validate(output, logger)
-            logger.debug("Success #{ele.description}")
+      # Utility method! Not intended to be called from outside this class.
+      # ---
+      # Process a single task. Called by run_tasks_parallel / run_tasks_serial.
+      # This method will report all exceptions in the OctocatalogDiff::Util::Parallel::Result object
+      # itself, and not raise them.
+      # @param task [OctocatalogDiff::Util::Parallel::Task] Task object
+      # @param logger [Logger] Logger
+      # @return [OctocatalogDiff::Util::Parallel::Result] Parallel task result
+      def self.execute_task(task, logger)
+        begin
+          logger.debug("Begin #{task.description}")
+          output = task.execute(logger)
+          result = Result.new(output: output, status: true, args: task.args)
+        rescue => exc
+          logger.debug("Failed #{task.description}: #{exc.class} #{exc.message}")
+          # Immediately return without running the validation, since this already failed.
+          return Result.new(exception: exc, status: false, args: task.args)
+        end
+
+        begin
+          if task.validate(output, logger)
+            logger.debug("Success #{task.description}")
           else
-            logger.warn("Failed #{ele.description}")
-            result[task_counter].status = false
+            # Preferably the validator method raised its own exception. However if it
+            # simply returned false, raise our own exception here.
+            raise "Failed #{task.description} validation (unspecified error)"
           end
-
-          break unless result[task_counter].status
-          task_counter += 1
+        rescue => exc
+          logger.warn("Failed #{task.description} validation: #{exc.class} #{exc.message}")
+          result.status = false
+          result.exception = exc
         end
 
-        # Return the result
         result
       end
     end
 
@@ -26,7 +26,6 @@ EOF
   s.add_runtime_dependency 'diffy', '>= 3.1.0'
   s.add_runtime_dependency 'httparty', '>= 0.11.0'
   s.add_runtime_dependency 'hashdiff', '>= 0.3.0'
-  s.add_runtime_dependency 'parallel', '>= 1.11.1'
   s.add_runtime_dependency 'rugged', '>= 0.25.0b2'
 
   s.add_development_dependency 'rspec', '~> 3.4.0'