Skip to content

Commit 13d02d3

Browse files
author
a.b.christie
committed
refactor: New concat behaviour (& workflow)
1 parent e21a410 commit 13d02d3

File tree

3 files changed

+31
-23
lines changed

3 files changed

+31
-23
lines changed

concatenator.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
from dm_job_utilities.dm_log import DmLog
66

77

8-
def find_files(files_glob):
9-
files = glob.glob(files_glob)
8+
def find_files(input_file, dirs_glob):
9+
10+
files = glob.glob(f"{dirs_glob}/{input_file}")
1011
DmLog.emit_event("Found {} files using {}".format(len(files), files_glob))
1112
return files
1213

1314

14-
def concat_binary(files_glob, output):
15-
files = find_files(files_glob)
15+
def concat_binary(input_file, dirs_glob, output):
16+
files = find_files(input_file, dirs_glob)
1617
with (open(output, 'wb') as outfile):
1718
file_count = 0
1819
for file in files:
@@ -23,8 +24,8 @@ def concat_binary(files_glob, output):
2324
DmLog.emit_event("Wrote {} files".format(file_count))
2425

2526

26-
def concat_text(files_glob, header, output):
27-
files = find_files(files_glob)
27+
def concat_text(input_file, dirs_glob, header, output):
28+
files = find_files(input_file, dirs_glob)
2829
output_count = 0
2930
with (open(output, 'w') as outfile):
3031
file_count = 0
@@ -48,18 +49,16 @@ def concat_text(files_glob, header, output):
4849
def main():
4950

5051
# Examples:
51-
# python -m concatenator -f "*.sdf"
52-
# python -m concatenator -f "abcd*/output.sdf"
53-
# python -m concatenator -f "*.smi" --header ignore
54-
# python -m concatenator -f "*.bin" --binary
52+
# python -m concatenator -f "output.sdf" -d "input-*"
5553
#
56-
# NOTE: that if using globs for the files argument this must be escaped (e.g. abcd\*) or put in
54+
# NOTE: when using globs for the files argument this must be escaped (e.g. abcd\*) or put in
5755
# quotes (e.g. "abcd*") so that they are not expanded by the shell.
5856
# NOTE: when using the --binary argument the --header argument is ignored.
5957

6058
# command line args definitions #########################################
6159
parser = argparse.ArgumentParser(description='Concatenate files')
62-
parser.add_argument('-f', '--files', required=True, help="Name(s) of files to look for (glob allowed)")
60+
parser.add_argument('-f', '--input-file', required=True, help="Name of the file to concatenate")
61+
parser.add_argument('-d', '--dirs-glob', required=True, help="Glob of directories to search")
6362
parser.add_argument('-o', '--output', required=True, help="Name(s) of output file")
6463
parser.add_argument('--header', choices=["ignore", "retain"],
6564
help="Files have a header line, and what to do with it. If 'retain' the header of the first file is retained")
@@ -69,9 +68,9 @@ def main():
6968
DmLog.emit_event("Concatenate files: ", args)
7069

7170
if args.binary:
72-
concat_binary(args.files, args.output)
71+
concat_binary(args.input_file, args.dirs_glob, args.output)
7372
else:
74-
concat_text(args.files, args.header, args.output)
73+
concat_text(args.input_file, args.dirs_glob, args.header, args.output)
7574

7675

7776
if __name__ == "__main__":

data-manager/file-utils.yaml

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ jobs:
7979
concatenator:
8080
name: Concatenate files
8181
description: >-
82-
Takes a number of input files and concatenates them into a single output file
82+
Takes the name of an input file (expected top be in a number of directories)
83+
and concatenates each instance found into a single output file
8384
version: '1.0.0'
8485
category: file utils
8586
keywords:
@@ -93,31 +94,36 @@ jobs:
9394
working-directory: /data
9495
fix-permissions: true
9596
command: >-
96-
python concatenator.py -f '{{ filesGlob }}' -d '{{ dirsGlob }}' {{ outputFile }}
97+
python concatenator.py -f '{{ inputFile }}' -d '{{ dirsGlob }}' {{ outputFile }}
9798
variables:
9899
order:
99100
options:
100-
- filesGlob
101101
- outputFile
102102
- header
103103
- binary
104+
inputs:
105+
type: object
106+
required:
107+
- inputFile
108+
properties:
109+
inputFile:
110+
title: SDFile to combine
111+
mime-types:
112+
- chemical/x-mdl-sdfile
113+
type: files
104114
outputs:
105115
type: object
106116
properties:
107117
outputFile:
108118
title: Output files
109119
creates: '{{ outputFile }}'
110-
type: files
120+
type: file
111121
options:
112122
type: object
113123
required:
114-
- filesGlob
124+
- dirsGlob
115125
- outputFile
116126
properties:
117-
filesGlob:
118-
title: Input files
119-
type: string
120-
pattern: "^[A-Za-z0-9_/\\.\\-\\?\\*]+$"
121127
dirsGlob:
122128
title: Dirs to search
123129
type: string

data-manager/workflow-docking.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,6 @@ steps:
9494
- variable: outputFile
9595
from-workflow:
9696
variable: outputFile
97+
- variable: dirsGlob
98+
from-predefined:
99+
variable: instance-link-glob

0 commit comments

Comments
 (0)