Skip to content

Commit 813e988

Browse files
vjaganat90Vasu Jaganath
andauthored
Fix whitespace in basepath and workflow_name and outdir location issue when using run_cwl_workflow (PolusAI#334)
* workflow name issue derived from filepath needs flattening solving the weird file not found error * resolve basepath, workflowname whitespace issues and correctly place outdir * remove workflow_name from outdir_toil when using run_cwl_workflow to avoid long names --------- Co-authored-by: Vasu Jaganath <[email protected]>
1 parent 5f052c3 commit 813e988

File tree

2 files changed

+19
-8
lines changed

2 files changed

+19
-8
lines changed

src/sophios/api/pythonapi.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# pylint: disable=W1203
22
"""CLT utilities."""
33
import logging
4-
from pathlib import Path
4+
import subprocess as sub
5+
from pathlib import Path, PurePath
56
from typing import Any, ClassVar, Optional, TypeVar, Union, Dict, List
67

78
import cwl_utils.parser as cu_parser
@@ -503,6 +504,10 @@ class Workflow(BaseModel):
503504
# TypeError: 'ModelPrivateAttr' object is not iterable
504505

505506
def __init__(self, steps: list, workflow_name: str):
507+
workflow_name = workflow_name.lstrip('/').lstrip(' ')
508+
parts = PurePath(workflow_name).parts
509+
workflow_name = ('_'.join(part for part in parts if part)).lstrip("_")
510+
workflow_name = workflow_name.replace(' ', '_')
506511
data = {
507512
"process_name": workflow_name,
508513
"steps": steps
@@ -796,6 +801,8 @@ def get_cwl_workflow(self, args_dict: Dict[str, str] = {}) -> Json:
796801
"yaml_inputs": yaml_inputs,
797802
**cwl_ast
798803
}
804+
# delete the skeletal and ephemeral autogenerated folder
805+
_ = sub.run(['rm', '-rf', 'autogenerated'], shell=True, check=False, executable="/bin/bash")
799806
return workflow_json
800807

801808
def run(self, compile_args_dict: Dict[str, str] = {}, run_args_dict: Dict[str, str] = {}) -> None:

src/sophios/run_local.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ def run_local(args: argparse.Namespace, rose_tree: RoseTree, cachedir: Optional[
304304
return retval
305305

306306

307-
def copy_output_files(yaml_stem: str) -> None:
307+
def copy_output_files(yaml_stem: str, basepath: str = '') -> None:
308308
"""Copies output files from the cachedir to outdir/
309309
310310
Args:
@@ -338,7 +338,11 @@ def copy_output_files(yaml_stem: str) -> None:
338338
# except do it BEFORE the extension.
339339
# This could still cause problems with slicing, i.e. if you scatter across
340340
# indices 11-20 first, then 1-10 second, the output file indices will get switched.
341-
dest = 'outdir/' + parentdirs + '/' + basename
341+
dest = ''
342+
if basepath:
343+
dest = basepath + '/' + 'outdir/' + parentdirs + '/' + basename
344+
else:
345+
dest = 'outdir/' + parentdirs + '/' + basename
342346
if dest in dests:
343347
idx = 2
344348
while Path(dest).exists():
@@ -364,7 +368,7 @@ def build_cmd(workflow_name: str, basepath: str, cwl_runner: str, container_cmd:
364368
"""
365369
quiet = ['--quiet']
366370
skip_schemas = ['--skip-schemas']
367-
provenance = ['--provenance', f'provenance/{workflow_name}']
371+
provenance = ['--provenance', f'{basepath}/provenance/{workflow_name}']
368372
container_cmd_: List[str] = []
369373
if container_cmd == 'docker':
370374
container_cmd_ = []
@@ -390,8 +394,8 @@ def build_cmd(workflow_name: str, basepath: str, cwl_runner: str, container_cmd:
390394
now = datetime.now()
391395
date_time = now.strftime("%Y%m%d%H%M%S")
392396
cmd = [script] + container_pull + provenance + container_cmd_ + path_check
393-
cmd += ['--outdir', f'outdir_toil_{workflow_name}_{date_time}',
394-
'--jobStore', f'file:./jobStore_{workflow_name}', # NOTE: This is the equivalent of --cachedir
397+
cmd += ['--outdir', f'{basepath}/outdir_toil_{date_time}',
398+
'--jobStore', f'file:{basepath}/jobStore_{workflow_name}', # NOTE: This is the equivalent of --cachedir
395399
'--clean', 'always', # This effectively disables caching, but is reproducible
396400
'--disableProgress', # disable the progress bar in the terminal, saves UI cycle
397401
'--workDir', '/data1',
@@ -451,7 +455,7 @@ def run_cwl_workflow(workflow_name: str, basepath: str, cwl_runner: str, contain
451455
print(e) # we are always running this on CI
452456
# only copy output files if using cwltool
453457
if cwl_runner == 'cwltool':
454-
copy_output_files(workflow_name)
458+
copy_output_files(workflow_name, basepath=basepath)
455459
return retval
456460

457461

@@ -468,11 +472,11 @@ async def run_cwl_serialized_async(workflow: Json, basepath: str,
468472
env_commands (List[str]): environment variables and commands needed to be run before running the workflow
469473
"""
470474
workflow_name = workflow['name']
475+
basepath = basepath.rstrip("/") if basepath != "/" else basepath
471476
output_dirs = pc.find_output_dirs(workflow)
472477
pc.create_output_dirs(output_dirs, basepath)
473478
compiled_cwl = workflow_name + '.cwl'
474479
inputs_yml = workflow_name + '_inputs.yml'
475-
basepath = basepath.rstrip("/") if basepath != "/" else basepath
476480
# write _input.yml file
477481
with open(Path(basepath) / inputs_yml, 'w', encoding='utf-8') as f:
478482
yaml.dump(workflow['yaml_inputs'], f)

0 commit comments

Comments
 (0)