common-workflow-language
diff --git a/‎cwltool/argparser.py
Lines changed: 1 addition & 3 deletions b/‎cwltool/argparser.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎cwltool/builder.py
Lines changed: 2 additions & 2 deletions b/‎cwltool/builder.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎cwltool/context.py
Lines changed: 6 additions & 1 deletion b/‎cwltool/context.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎cwltool/docker.py
Lines changed: 2 additions & 0 deletions b/‎cwltool/docker.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎cwltool/executors.py
Lines changed: 140 additions & 64 deletions b/‎cwltool/executors.py
Lines changed: 140 additions & 64 deletions
diff --git a/‎cwltool/expression.py
Lines changed: 2 additions & 2 deletions b/‎cwltool/expression.py
Lines changed: 2 additions & 2 deletions
@@ -22,9 +22,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="Output directory, default current directory")
 
     parser.add_argument("--parallel", action="store_true", default=False,
-                        help="[experimental] Run jobs in parallel. "
-                             "Does not currently keep track of ResourceRequirements like the number of cores"
-                             "or memory and can overload this system")
+                        help="[experimental] Run jobs in parallel. ")
     envgroup = parser.add_mutually_exclusive_group()
     envgroup.add_argument("--preserve-environment", type=Text, action="append",
                         help="Preserve specific environment variable when "
 
@@ -113,7 +113,7 @@ def __init__(self,
                  hints=None,                # type: List[Dict[Text, Any]]
                  timeout=None,              # type: float
                  debug=False,               # type: bool
-                 resources=None,            # type: Dict[Text, int]
+                 resources=None,            # type: Dict[str, int]
                  js_console=False,          # type: bool
                  mutation_manager=None,     # type: Optional[MutationManager]
                  formatgraph=None,          # type: Optional[Graph]
@@ -154,7 +154,7 @@ def __init__(self,
         self.tmpdir = tmpdir
 
         if resources is None:
-            self.resources = {}  # type: Dict[Text, int]
+            self.resources = {}  # type: Dict[str, int]
         else:
             self.resources = resources
 
 
@@ -1,4 +1,6 @@
 import copy
+import threading  # pylint: disable=unused-import
+
 from .utils import DEFAULT_TMP_PREFIX
 from .stdfsaccess import StdFsAccess
 from typing import (Any, Callable, Dict,  # pylint: disable=unused-import
@@ -68,6 +70,8 @@ def copy(self):
 class RuntimeContext(ContextBase):
     def __init__(self, kwargs=None):
         # type: (Optional[Dict[str, Any]]) -> None
+        select_resources_callable = Callable[  # pylint: disable=unused-variable
+            [Dict[str, int], RuntimeContext], Dict[str, int]]
         self.user_space_docker_cmd = "" # type: Text
         self.secret_store = None        # type: Optional[SecretStore]
         self.no_read_only = False       # type: bool
@@ -107,7 +111,7 @@ def __init__(self, kwargs=None):
         self.docker_stagedir = ""       # type: Text
         self.js_console = False         # type: bool
         self.job_script_provider = None  # type: Optional[DependenciesConfiguration]
-        self.select_resources = None    # type: Optional[Callable[[Dict[Text, int]], Dict[Text, int]]]
+        self.select_resources = None    # type: Optional[select_resources_callable]
         self.eval_timeout = 20          # type: float
         self.postScatterEval = None     # type: Optional[Callable[[Dict[Text, Any]], Dict[Text, Any]]]
         self.on_error = "stop"          # type: Text
@@ -116,6 +120,7 @@ def __init__(self, kwargs=None):
         self.cidfile_dir = None
         self.cidfile_prefix = None
 
+        self.workflow_eval_lock = None  # type: Optional[threading.Condition]
         self.research_obj = None        # type: Optional[ResearchObject]
         self.orcid = None
         self.cwl_full_name = None
 
@@ -339,4 +339,6 @@ def create_runtime(self, env, runtimeContext):
         for t, v in self.environment.items():
             runtime.append(u"--env=%s=%s" % (t, v))
 
+        runtime.append("--memory=%dm" % self.builder.resources["ram"])
+
         return runtime
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+""" Single and multi-threaded executors."""
 import os
 import tempfile
 import threading
@@ -10,6 +12,8 @@
 import six
 from six import string_types
 
+import psutil
+
 from .builder import Builder  # pylint: disable=unused-import
 from .errors import WorkflowException
 from .loghandler import _logger
@@ -44,32 +48,33 @@ def run_jobs(self,
                  process,           # type: Process
                  job_order_object,  # type: Dict[Text, Any]
                  logger,
-                 runtimeContext     # type: RuntimeContext
+                 runtime_context     # type: RuntimeContext
                 ):  # type: (...) -> None
         """ Execute the jobs for the given Process. """
         pass
 
     def execute(self,
                 process,           # type: Process
                 job_order_object,  # type: Dict[Text, Any]
-                runtimeContext,    # type: RuntimeContext
+                runtime_context,    # type: RuntimeContext
                 logger=_logger,
                ):  # type: (...) -> Tuple[Optional[Dict[Text, Any]], Text]
         """ Execute the process. """
 
-        if not runtimeContext.basedir:
+        if not runtime_context.basedir:
             raise WorkflowException("Must provide 'basedir' in runtimeContext")
 
         finaloutdir = None  # Type: Optional[Text]
-        original_outdir = runtimeContext.outdir
+        original_outdir = runtime_context.outdir
         if isinstance(original_outdir, string_types):
             finaloutdir = os.path.abspath(original_outdir)
-        runtimeContext = runtimeContext.copy()
-        runtimeContext.outdir = tempfile.mkdtemp(
-            prefix=getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))
-        self.output_dirs.add(runtimeContext.outdir)
-        runtimeContext.mutation_manager = MutationManager()
-        runtimeContext.toplevel = True
+        runtime_context = runtime_context.copy()
+        runtime_context.outdir = tempfile.mkdtemp(
+            prefix=getdefault(runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))
+        self.output_dirs.add(runtime_context.outdir)
+        runtime_context.mutation_manager = MutationManager()
+        runtime_context.toplevel = True
+        runtime_context.workflow_eval_lock = threading.Condition(threading.RLock())
 
         job_reqs = None
         if "cwl:requirements" in job_order_object:
@@ -81,20 +86,20 @@ def execute(self,
             for req in job_reqs:
                 process.requirements.append(req)
 
-        self.run_jobs(process, job_order_object, logger, runtimeContext)
+        self.run_jobs(process, job_order_object, logger, runtime_context)
 
         if self.final_output and self.final_output[0] and finaloutdir:
             self.final_output[0] = relocateOutputs(
                 self.final_output[0], finaloutdir, self.output_dirs,
-                runtimeContext.move_outputs, runtimeContext.make_fs_access(""),
-                getdefault(runtimeContext.compute_checksum, True))
+                runtime_context.move_outputs, runtime_context.make_fs_access(""),
+                getdefault(runtime_context.compute_checksum, True))
 
-        if runtimeContext.rm_tmpdir:
+        if runtime_context.rm_tmpdir:
             cleanIntermediate(self.output_dirs)
 
         if self.final_output and self.final_status:
 
-            if runtimeContext.research_obj is not None and \
+            if runtime_context.research_obj is not None and \
                     isinstance(process, (JobBase, Process, WorkflowJobStep,
                                          WorkflowJob)) and process.parent_wf:
                 process_run_id = None
@@ -115,45 +120,46 @@ def run_jobs(self,
                  process,           # type: Process
                  job_order_object,  # type: Dict[Text, Any]
                  logger,
-                 runtimeContext     # type: RuntimeContext
+                 runtime_context     # type: RuntimeContext
                 ):  # type: (...) -> None
 
         process_run_id = None  # type: Optional[str]
         reference_locations = {}  # type: Dict[Text,Text]
 
         # define provenance profile for single commandline tool
         if not isinstance(process, Workflow) \
-                and runtimeContext.research_obj is not None:
-            orcid = runtimeContext.orcid
-            full_name = runtimeContext.cwl_full_name
+                and runtime_context.research_obj is not None:
+            orcid = runtime_context.orcid
+            full_name = runtime_context.cwl_full_name
             process.provenance_object = CreateProvProfile(
-                runtimeContext.research_obj, orcid, full_name)
+                runtime_context.research_obj, orcid, full_name)
             process.parent_wf = process.provenance_object
-        jobiter = process.job(job_order_object, self.output_callback, runtimeContext)
+        jobiter = process.job(job_order_object, self.output_callback,
+                              runtime_context)
 
         try:
             for job in jobiter:
                 if job:
-                    if runtimeContext.builder is not None:
-                        job.builder = runtimeContext.builder
+                    if runtime_context.builder is not None:
+                        job.builder = runtime_context.builder
                     if job.outdir:
                         self.output_dirs.add(job.outdir)
-                    if runtimeContext.research_obj is not None:
+                    if runtime_context.research_obj is not None:
                         if not isinstance(process, Workflow):
-                            runtimeContext.prov_obj = process.provenance_object
+                            runtime_context.prov_obj = process.provenance_object
                         else:
-                            runtimeContext.prov_obj = job.prov_obj
-                        assert runtimeContext.prov_obj
+                            runtime_context.prov_obj = job.prov_obj
+                        assert runtime_context.prov_obj
                         process_run_id, reference_locations = \
-                                runtimeContext.prov_obj.evaluate(
-                                        process, job, job_order_object,
-                                        runtimeContext.make_fs_access,
-                                        runtimeContext)
-                        runtimeContext = runtimeContext.copy()
-                        runtimeContext.process_run_id = process_run_id
-                        runtimeContext.reference_locations = \
+                                runtime_context.prov_obj.evaluate(
+                                    process, job, job_order_object,
+                                    runtime_context.make_fs_access,
+                                    runtime_context)
+                        runtime_context = runtime_context.copy()
+                        runtime_context.process_run_id = process_run_id
+                        runtime_context.reference_locations = \
                             reference_locations
-                    job.run(runtimeContext)
+                    job.run(runtime_context)
                 else:
                     logger.error("Workflow cannot make any more progress.")
                     break
@@ -168,60 +174,130 @@ class MultithreadedJobExecutor(JobExecutor):
     """
     Experimental multi-threaded CWL executor.
 
-    Can easily overload a system as it does not do resource accounting.
+    Does simple resource accounting, will not start a job unless it
+    has cores / ram available, but does not make any attempt to
+    optimize usage.
     """
+
     def __init__(self):  # type: () -> None
         super(MultithreadedJobExecutor, self).__init__()
         self.threads = set()  # type: Set[threading.Thread]
         self.exceptions = []  # type: List[WorkflowException]
+        self.pending_jobs = []  # type: List[JobBase]
+        self.pending_jobs_lock = threading.Lock()
+
+        self.max_ram = psutil.virtual_memory().available / 2**20
+        self.max_cores = psutil.cpu_count()
+        self.allocated_ram = 0
+        self.allocated_cores = 0
+
+    def select_resources(self, request, runtime_context):  # pylint: disable=unused-argument
+        # type: (Dict[str, int], RuntimeContext) -> Dict[str, int]
+        """ Naïve check for available cpu cores and memory. """
+        result = {}  # type: Dict[str, int]
+        maxrsc = {
+            "cores": self.max_cores,
+            "ram": self.max_ram
+        }
+        for rsc in ("cores", "ram"):
+            if request[rsc+"Min"] > maxrsc[rsc]:
+                raise WorkflowException(
+                    "Requested at least %d %s but only %d available" %
+                    (request[rsc+"Min"], rsc, maxrsc[rsc]))
+            if request[rsc+"Max"] < maxrsc[rsc]:
+                result[rsc] = request[rsc+"Max"]
+            else:
+                result[rsc] = maxrsc[rsc]
+
+        return result
 
     def run_job(self,
-                job,      # type: JobBase
-                runtimeContext  # type: RuntimeContext
+                job,             # type: JobBase
+                runtime_context  # type: RuntimeContext
                ):  # type: (...) -> None
         """ Execute a single Job in a seperate thread. """
-        def runner():
-            """ Job running thread. """
-            try:
-                job.run(runtimeContext)
-            except WorkflowException as err:
-                self.exceptions.append(err)
-            except Exception as err:
-                self.exceptions.append(WorkflowException(Text(err)))
-            self.threads.remove(thread)
-
-        thread = threading.Thread(target=runner)
-        thread.daemon = True
-        self.threads.add(thread)
-        thread.start()
-
-    def wait_for_next_completion(self):  # type: () -> None
-        """ Check for exceptions while waiting for the jobs to finish. """
+
+        if job is not None:
+            with self.pending_jobs_lock:
+                self.pending_jobs.append(job)
+
+        while self.pending_jobs:
+            with self.pending_jobs_lock:
+                job = self.pending_jobs[0]
+                if isinstance(job, JobBase):
+                    if ((self.allocated_ram + job.builder.resources["ram"])
+                            > self.max_ram or
+                            (self.allocated_cores + job.builder.resources["cores"])
+                            > self.max_cores):
+                        return
+                self.pending_jobs.remove(job)
+
+            def runner(my_job, my_runtime_context):
+                """ Job running thread. """
+                try:
+                    my_job.run(my_runtime_context)
+                except WorkflowException as err:
+                    _logger.exception("Got workflow error")
+                    self.exceptions.append(err)
+                except Exception as err:  # pylint: disable=broad-except
+                    _logger.exception("Got workflow error")
+                    self.exceptions.append(WorkflowException(Text(err)))
+                finally:
+                    with my_runtime_context.workflow_eval_lock:
+                        self.threads.remove(threading.current_thread())
+                        if isinstance(my_job, JobBase):
+                            self.allocated_ram -= my_job.builder.resources["ram"]
+                            self.allocated_cores -= my_job.builder.resources["cores"]
+                        my_runtime_context.workflow_eval_lock.notifyAll()
+
+            thread = threading.Thread(
+                target=runner, args=(job, runtime_context))
+            thread.daemon = True
+            self.threads.add(thread)
+            if isinstance(job, JobBase):
+                self.allocated_ram += job.builder.resources["ram"]
+                self.allocated_cores += job.builder.resources["cores"]
+            thread.start()
+
+
+    def wait_for_next_completion(self, runtimeContext):  # type: (RuntimeContext) -> None
+        """ Wait for jobs to finish. """
+        if runtimeContext.workflow_eval_lock is not None:
+            runtimeContext.workflow_eval_lock.wait()
         if self.exceptions:
             raise self.exceptions[0]
 
     def run_jobs(self,
                  process,           # type: Process
                  job_order_object,  # type: Dict[Text, Any]
                  logger,
-                 runtimeContext     # type: RuntimeContext
+                 runtime_context     # type: RuntimeContext
                 ):  # type: (...) -> None
 
-        jobiter = process.job(job_order_object, self.output_callback, runtimeContext)
+        jobiter = process.job(job_order_object, self.output_callback,
+                              runtime_context)
 
+        if runtime_context.workflow_eval_lock is None:
+            raise WorkflowException(
+                "runtimeContext.workflow_eval_lock must not be None")
+
+        runtime_context.workflow_eval_lock.acquire()
         for job in jobiter:
-            if job:
-                if runtimeContext.builder is not None:
-                    job.builder = runtimeContext.builder
+            if job is not None:
+                if runtime_context.builder is not None:
+                    job.builder = runtime_context.builder
                 if job.outdir:
                     self.output_dirs.add(job.outdir)
-                self.run_job(job, runtimeContext)
-            else:
+
+            self.run_job(job, runtime_context)
+
+            if job is None:
                 if self.threads:
-                    self.wait_for_next_completion()
+                    self.wait_for_next_completion(runtime_context)
                 else:
                     logger.error("Workflow cannot make any more progress.")
                     break
 
         while self.threads:
-            self.wait_for_next_completion()
+            self.wait_for_next_completion(runtime_context)
+        runtime_context.workflow_eval_lock.release()
@@ -253,7 +253,7 @@ def do_eval(ex,                       # type: Union[Text, Dict]
             requirements,             # type: List[Dict[Text, Any]]
             outdir,                   # type: Optional[Text]
             tmpdir,                   # type: Optional[Text]
-            resources,                # type: Dict[Text, int]
+            resources,                # type: Dict[str, int]
             context=None,             # type: Any
             timeout=None,             # type: float
             force_docker_pull=False,  # type: bool
@@ -262,7 +262,7 @@ def do_eval(ex,                       # type: Union[Text, Dict]
             strip_whitespace=True     # type: bool
            ):  # type: (...) -> Any
 
-    runtime = copy.copy(resources)  # type: Dict[Text, Any]
+    runtime = copy.copy(resources)  # type: Dict[str, Any]
     runtime["tmpdir"] = docker_windows_path_adjust(tmpdir)
     runtime["outdir"] = docker_windows_path_adjust(outdir)