Barski-lab
diff --git a/‎.travis.yml‎
Lines changed: 5 additions & 1 deletion b/‎.travis.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎cwl_airflow/components/api/backend.py‎
Lines changed: 34 additions & 36 deletions b/‎cwl_airflow/components/api/backend.py‎
Lines changed: 34 additions & 36 deletions
diff --git a/‎cwl_airflow/components/api/openapi/swagger_configuration.yaml‎
Lines changed: 69 additions & 2 deletions b/‎cwl_airflow/components/api/openapi/swagger_configuration.yaml‎
Lines changed: 69 additions & 2 deletions
diff --git a/‎cwl_airflow/components/test/conformance.py‎
Lines changed: 52 additions & 23 deletions b/‎cwl_airflow/components/test/conformance.py‎
Lines changed: 52 additions & 23 deletions
@@ -66,6 +66,10 @@ jobs:
         tags: true
   - name: DAG with embedded workflow (just one test)
     script: cwl-airflow test --suite workflows/tests/conformance_tests.yaml --spin --range 1 --embed
+  - name: DAG with attached workflow using combined API call (just one test)
+    script: cwl-airflow test --suite workflows/tests/conformance_tests.yaml --spin --range 1 --combine
+  - name: DAG with embedded workflow using combined API call (just one test)
+    script: cwl-airflow test --suite workflows/tests/conformance_tests.yaml --spin --range 1 --embed --combine
   - name: Test of `init --upgrade`
     before_install:
     - mkdir -p ~/airflow/dags
@@ -115,7 +119,7 @@ script: cwl-airflow test --suite workflows/tests/conformance_tests.yaml --spin -
 branches:
   only:
   - master
-  - /*_devel/
+  - /^*_devel$/
   - /^([1-9]\d*!)?(0|[1-9]\d*)(\.(0|[1-9]\d*))*((a|b|rc)(0|[1-9]\d*))?(\.post(0|[1-9]\d*))?(\.dev(0|[1-9]\d*))?$/
 
 notifications:
 
@@ -33,6 +33,7 @@
     fast_cwl_load,
     slow_cwl_load,
     convert_to_workflow,
+    clean_up_dag_run,
     DAG_TEMPLATE
 )
 
@@ -75,7 +76,7 @@ def __init__(self):
 
 
     def get_dags(self, dag_ids=[]):
-        logging.debug(f"Call get_dags with dag_ids={dag_ids}")
+        logging.info(f"Call get_dags with dag_ids={dag_ids}")
         try:
             dag_ids = dag_ids or self.list_dags()
             logging.debug(f"Processing dags {dag_ids}")
@@ -85,41 +86,39 @@ def get_dags(self, dag_ids=[]):
             return {"dags": []}
 
 
-    def post_dag(self, dag_id=None):
-        logging.debug(f"Call post_dag with dag_id={dag_id}")
+    def post_dags(self, dag_id=None):
+        logging.info(f"Call post_dags with dag_id={dag_id}")
         try:
             res = self.export_dag(dag_id or ''.join(random.choice(string.ascii_lowercase) for i in range(32)))
             logging.debug(f"Exported DAG {res}")
             return res
         except Exception as err:
-            logging.error(f"Failed while running post_dag {err}")
+            logging.error(f"Failed while running post_dags {err}")
             return connexion.problem(500, "Failed to create dag", str(err))
 
 
     def get_dag_runs(self, dag_id=None, run_id=None, execution_date=None, state=None):
-        logging.debug(f"Call get_dag_runs with dag_id={dag_id}, run_id={run_id}, execution_date={execution_date}, state={state}")
+        logging.info(f"Call get_dag_runs with dag_id={dag_id}, run_id={run_id}, execution_date={execution_date}, state={state}")
         try:
             dag_runs = []
             dag_ids = [dag_id] if dag_id else self.list_dags()
-            logging.debug(f"Processing dags {dag_ids}")
+            logging.debug(f"Found dags {dag_ids}")
             for d_id in dag_ids:
-                logging.debug(f"Process dag  {d_id}")
-                task_ids = self.list_tasks(d_id)
-                logging.debug(f"Fetched tasks {task_ids}")
-                for dag_run in self.list_dag_runs(d_id, state):
-                    logging.debug(f"Process dag run {dag_run['run_id']}, {dag_run['execution_date']}")
-                    if run_id and run_id != dag_run["run_id"] or execution_date and execution_date != dag_run["execution_date"]:
-                        logging.debug(f"Skip dag_run {dag_run['run_id']}, {dag_run['execution_date']} (run_id or execution_date doesn't match)")
+                logging.info(f"Process dag  {d_id}")
+                for dag_run in DagRun.find(dag_id=d_id, state=state):
+                    logging.info(f"Process dag_run {dag_run.run_id}, {dag_run.execution_date.isoformat()}")
+                    if run_id and run_id != dag_run.run_id or execution_date and execution_date != dag_run.execution_date.isoformat():
+                        logging.info(f"Skip dag_run {dag_run.run_id}, {dag_run.execution_date.isoformat()} (run_id or execution_date doesn't match)")
                         continue
-                    response_item = {"dag_id": d_id,
-                                     "run_id": dag_run["run_id"],
-                                     "execution_date": dag_run["execution_date"],
-                                     "start_date": dag_run["start_date"],
-                                     "state": dag_run["state"],
-                                     "tasks": []}
-                    logging.debug(f"Get statuses for tasks {task_ids}")
-                    for t_id in task_ids:
-                        response_item["tasks"].append({"id": t_id, "state": self.task_state(d_id, t_id, dag_run["execution_date"])})
+                    response_item = {
+                        "dag_id": d_id,
+                        "run_id": dag_run.run_id,
+                        "execution_date": dag_run.execution_date.isoformat(),
+                        "start_date": dag_run.start_date.isoformat(),
+                        "state": dag_run.state,
+                        "tasks": [{"id": ti.task_id, "state": ti.state} for ti in dag_run.get_task_instances()],
+                        "progress": int(len([ti for ti in dag_run.get_task_instances(State.SUCCESS)]) / len(dag_run.get_task_instances()) * 100)
+                    }
                     dag_runs.append(response_item)
             return {"dag_runs": dag_runs}
         except Exception as err:
@@ -128,7 +127,7 @@ def get_dag_runs(self, dag_id=None, run_id=None, execution_date=None, state=None
 
 
     def post_dag_runs(self, dag_id, run_id=None, conf=None):
-        logging.debug(f"Call post_dag_runs with dag_id={dag_id}, run_id={run_id}, conf={conf}")
+        logging.info(f"Call post_dag_runs with dag_id={dag_id}, run_id={run_id}, conf={conf}")
         try:
             dagrun = self.trigger_dag(dag_id, run_id, conf)
             return {"dag_id": dagrun.dag_id,
@@ -141,9 +140,20 @@ def post_dag_runs(self, dag_id, run_id=None, conf=None):
             return connexion.problem(500, "Failed to create dag_run", str(err))
 
 
+    def post_dags_dag_runs(self, dag_id, run_id, conf=None):
+        logging.info(f"Call post_dags_dag_runs with dag_id={dag_id}, run_id={run_id}, conf={conf}")
+        self.post_dags(dag_id)
+        clean_up_dag_run(
+            dag_id=dag_id,
+            run_id=run_id,
+            kill_timeout=3  # use shorter timeout for killing runnign tasks
+        )                   # should wait untill it finish running, as we can't trigger the same DAG with the same run_id
+        return self.post_dag_runs(dag_id, run_id, conf)
+
+
     def post_dag_runs_legacy(self, dag_id):
         data = connexion.request.json
-        logging.debug(f"Call post_dag_runs_legacy with dag_id={dag_id}, data={data}")
+        logging.info(f"Call post_dag_runs_legacy with dag_id={dag_id}, data={data}")
         return self.post_dag_runs(dag_id, data["run_id"], data["conf"])
 
 
@@ -223,18 +233,6 @@ def task_state(self, dag_id, task_id, execution_date):
         return task_state
 
 
-    def list_dag_runs(self, dag_id, state):
-        dag_runs = []
-        for dag_run in DagRun.find(dag_id=dag_id, state=state):
-            dag_runs.append({
-                "run_id": dag_run.run_id,
-                "state": dag_run.state,
-                "execution_date": dag_run.execution_date.isoformat(),
-                "start_date": ((dag_run.start_date or '') and dag_run.start_date.isoformat())
-            })
-        return dag_runs
-
-
     def save_attachment(self, attachment, location, exist_ok=False):
         if path.isfile(location) and not exist_ok:
             raise FileExistsError(f"File {location} already exist")
 
@@ -1,7 +1,7 @@
 swagger: "2.0"
 info:
   title: CWL-Airflow API
-  version: 1.0.0
+  version: 1.0.1
 basePath: "/api/experimental"
 schemes:
   - https
@@ -53,7 +53,7 @@ paths:
     post:
       summary: Creates new dag with dag_id from the attached workflow.cwl file or its compressed content.
       description: Creates new dag with dag_id from the attached workflow.cwl file or its compressed content.
-      operationId: post_dag
+      operationId: post_dags
       responses:
         "200":
           description: dag_id, py and cwl file locations of a created dag.
@@ -194,6 +194,70 @@ paths:
       tags:
         - Airflow
 
+  /dags/dag_runs:
+    post:
+      summary: Combined logic from /dags and /dag_runs POSTs
+      description: >-
+        1. Creates new dag with dag_id from the attached workflow.cwl file or its compressed content.
+           Either workflow or workflow_content should be provided. 
+           dag_id should follow the naming rule "cwlid-commitsha", otherwise outdated dags won't be deleted.
+        2. Tries to delete all previous dag_runs for the provided dag_id and run_id, which also includes
+           - stopping all running tasks for the current dag_id and run_id
+           - removing correspondent temporary data
+           - cleaning up correspondent records in DB
+           - removing outdated dags for the same cwlid if no running dag_runs were found for them
+        3. Creates new dag_run for dag_id with run_id and optional conf
+      operationId: post_dags_dag_runs
+      responses:
+        "200":
+          description: Reference information about created dag and dag_run.
+          schema:
+            $ref: "#/definitions/PostDagRunsResponse"
+        "400":
+          description: The request is malformed.
+          schema:
+            $ref: "#/definitions/ErrorResponse"
+        "401":
+          description: The request is unauthorized.
+          schema:
+            $ref: "#/definitions/ErrorResponse"
+        "403":
+          description: The requester is not authorized to perform this action.
+          schema:
+            $ref: "#/definitions/ErrorResponse"
+        "500":
+          description: An unexpected error occurred.
+          schema:
+            $ref: "#/definitions/ErrorResponse"
+      parameters:
+        - name: dag_id
+          description: Dag identifier, follow the naming rule "cwlid-commitsha"
+          in: query
+          required: true
+          type: string
+        - name: run_id
+          description: Run identifier
+          in: query
+          required: true
+          type: string
+        - name: conf
+          description: Run configuration (JSON-formatted string)
+          in: query
+          required: false
+          type: string
+        - name: workflow
+          description: CWL workflow file with embedded tools and all other dependencies
+          in: formData
+          required: false
+          type: file
+        - name: workflow_content
+          description: base64 encoded zlib compressed workflow content
+          in: formData
+          required: false
+          type: string
+      tags:
+        - Airflow
+
   # delete this path if not actively used 
   /dags/{dag_id}/dag_runs:
     post:
@@ -610,6 +674,7 @@ definitions:
      - start_date
      - state
      - tasks
+     - progress
     properties:
       dag_id:
         type: string
@@ -632,6 +697,8 @@ definitions:
               type: string
             state:
               $ref: "#/definitions/TaskState"
+      progress:
+        type: integer
     description: Dag run info
 
   DagRunState:
 
@@ -199,6 +199,7 @@ def create_dags(suite_data, args, dags_folder=None):
     # TODO: Do we need to force scheduler to reload DAGs after all DAG added?
 
     for test_data in suite_data.values():
+        params = {"dag_id": test_data["dag_id"]}
         workflow_path = os.path.join(
             args.tmp,
             os.path.basename(test_data["tool"])
@@ -214,18 +215,14 @@ def create_dags(suite_data, args, dags_folder=None):
                 logging.info(f"Sending base64 encoded zlib compressed content from {workflow_path}")
                 r = requests.post(
                     url=urljoin(args.api, "/api/experimental/dags"),
-                    params={
-                        "dag_id": test_data["dag_id"]
-                    },
+                    params=params,
                     json={"workflow_content": get_compressed(input_stream)}
                 )
             else:                                                                                     # attach workflow as a file
                 logging.info(f"Attaching workflow file {workflow_path}")
                 r = requests.post(
                     url=urljoin(args.api, "/api/experimental/dags"),
-                    params={
-                        "dag_id": test_data["dag_id"]
-                    },
+                    params=params,
                     files={"workflow": input_stream}
                 )
 
@@ -241,23 +238,52 @@ def create_dags(suite_data, args, dags_folder=None):
 def trigger_dags(suite_data, args):
     """
     Triggers all DAGs from "suite_data". If failed to trigger DAG, updates
-    "suite_data" with "error" and sets "finished" to True
+    "suite_data" with "error" and sets "finished" to True. In case --combine
+    was set, we will call API that will first create the new DAG, then clean
+    all previous DAG runs based on the provided run_id and dag_id, then remove
+    outdated DAGs for the same workflow (for that dag_id should follow naming
+    rule cwlid-commitsha) and only after that trigger the workflow execution.
+    If not only --combine but also --embed was provided, send base64 encoded
+    zlib compressed content of the workflow file instead of attaching it.
     """
 
     for run_id, test_data in suite_data.items():
-        logging.info(f"Trigger DAG {test_data['dag_id']} from test case {test_data['index']} as {run_id}")
-        r = requests.post(
-            url=urljoin(args.api, "/api/experimental/dag_runs"),
-            params={
-                "run_id": run_id,
-                "dag_id": test_data["dag_id"],
-                "conf": json.dumps(
-                    {
-                        "job": test_data["job"]
-                    }
-                )
-            }
-        )
+        params = {
+            "run_id": run_id,
+            "dag_id": test_data["dag_id"],
+            "conf": json.dumps({"job": test_data["job"]})
+        }
+        if args.combine:  # use API endpoint that combines both creating, cleaning and triggerring new DAGs
+            logging.info(f"Add and trigger DAG {test_data['dag_id']} from test case {test_data['index']} as {run_id}")
+            workflow_path = os.path.join(
+                args.tmp,
+                os.path.basename(test_data["tool"])
+            )
+            embed_all_runs(                                                                               # will save results to "workflow_path"
+                workflow_tool=fast_cwl_load(test_data["tool"]),
+                location=workflow_path
+            )
+            with open(workflow_path, "rb") as input_stream:
+                if args.embed:                                                                            # send base64 encoded zlib compressed workflow content that will be embedded into DAG python file
+                    logging.info(f"Sending base64 encoded zlib compressed content from {workflow_path}")
+                    r = requests.post(
+                        url=urljoin(args.api, "/api/experimental/dags/dag_runs"),
+                        params=params,
+                        json={"workflow_content": get_compressed(input_stream)}
+                    )
+                else:                                                                                     # attach workflow as a file
+                    logging.info(f"Attaching workflow file {workflow_path}")
+                    r = requests.post(
+                        url=urljoin(args.api, "/api/experimental/dags/dag_runs"),
+                        params=params,
+                        files={"workflow": input_stream}
+                    )
+        else:
+            logging.info(f"Trigger DAG {test_data['dag_id']} from test case {test_data['index']} as {run_id}")
+            r = requests.post(
+                url=urljoin(args.api, "/api/experimental/dag_runs"),
+                params=params
+            )
         if not r.ok:
             reason = get_api_failure_reason(r)
             logging.error(f"Failed to trigger DAG {test_data['dag_id']} from test case {test_data['index']} as {run_id} due to {reason}")
@@ -287,8 +313,10 @@ def run_test_conformance(args):
     suite_data = load_test_suite(args)
     results_queue = queue.Queue(maxsize=len(suite_data))
 
-    # Create new dags
-    create_dags(suite_data, args)                           # only reads from "suite_data"
+    # Create new DAGs if --combine wasn't set and we want to use two
+    # separate API calls for creating and trigerring DAGs
+    if not args.combine:
+        create_dags(suite_data, args)                           # only reads from "suite_data"
 
     # Start thread to listen for status updates before
     # we trigger DAGs. "results_queue" is thread safe
@@ -301,7 +329,8 @@ def run_test_conformance(args):
 
     # Trigger all dags updating "suite_data" items with "error" and "finished"=True
     # for all DAG runs that we failed to trigger. Writing to "suite_data" is not
-    # thread safe!
+    # thread safe! If --combine was set, this function will also create new DAGs
+    # and clean old DAG runs
     trigger_dags(suite_data, args)
 
     # Start checker thread to evaluate received results.