Skip to content

Commit ca6b638

Browse files
author
Alan Christie
committed
- Further work restructuring
- Preparing for version 2.2.0 - Adds a few simple file-based tests
1 parent 9786842 commit ca6b638

File tree

6 files changed

+87
-55
lines changed

6 files changed

+87
-55
lines changed

src/python/pipelines_utils/BasicObjectWriter.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,3 @@ def write(self, dictOfValues, objectUUID=None):
5050
def close(self):
5151
if self.file:
5252
self.file.close()
53-
54-

src/python/pipelines_utils/TsvWriter.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,3 @@ def writeFooter(self):
5050
def close(self):
5151
if self.file:
5252
self.file.close()
53-
54-
55-
56-

src/python/pipelines_utils/parameter_utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,31 @@
2121
"""
2222

2323

24+
def add_default_input_args(parser):
25+
parser.add_argument('-i', '--input',
26+
help="Input file, if not defined the STDIN is used")
27+
parser.add_argument('-if', '--informat', choices=['sdf', 'json'],
28+
help="Input format."
29+
" When using STDIN this must be specified.")
30+
31+
32+
def add_default_output_args(parser):
33+
parser.add_argument('-o', '--output',
34+
help="Base name for output file (no extension)."
35+
" If not defined then SDTOUT is used for the"
36+
" structures and output is used as base name"
37+
" of the other files.")
38+
parser.add_argument('-of', '--outformat', choices=['sdf', 'json'],
39+
help="Output format. Defaults to 'sdf'.")
40+
parser.add_argument('--meta', action='store_true',
41+
help='Write metadata and metrics files')
42+
43+
44+
def add_default_io_args(parser):
45+
add_default_input_args(parser)
46+
add_default_output_args(parser)
47+
48+
2449
def splitValues(textStr):
2550
"""Splits a comma-separated number sequence into a list (of floats).
2651
"""

src/python/pipelines_utils/utils.py

Lines changed: 17 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -14,57 +14,22 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17-
1817
from __future__ import print_function
1918
import sys, gzip, json, uuid
2019
from math import log10, floor
2120
from pipelines_utils.BasicObjectWriter import BasicObjectWriter
2221
from pipelines_utils.TsvWriter import TsvWriter
2322

23+
2424
def log(*args, **kwargs):
25-
"""
26-
Log output to STDERR
25+
"""Log output to STDERR
2726
"""
2827
print(*args, file=sys.stderr, **kwargs)
2928

29+
3030
def round_sig(x, sig):
3131
"""Round the number to the specified number of significant figures"""
32-
return round(x, sig - int(floor(log10(abs(x))))-1)
33-
34-
def add_default_input_args(parser):
35-
parser.add_argument('-i', '--input', help="Input file, if not defined the STDIN is used")
36-
parser.add_argument('-if', '--informat', choices=['sdf', 'json'], help="Input format. When using STDIN this must be specified.")
37-
38-
def add_default_output_args(parser):
39-
parser.add_argument('-o', '--output', help="Base name for output file (no extension). If not defined then SDTOUT is used for the structures and output is used as base name of the other files.")
40-
parser.add_argument('-of', '--outformat', choices=['sdf', 'json'], help="Output format. Defaults to 'sdf'.")
41-
parser.add_argument('--meta', action='store_true', help='Write metadata and metrics files')
42-
43-
def add_default_io_args(parser):
44-
add_default_input_args(parser)
45-
add_default_output_args(parser)
46-
47-
48-
def default_open_input_output(inputDef, inputFormat, outputDef, defaultOutput, outputFormat, thinOutput=False, valueClassMappings=None,
49-
datasetMetaProps=None, fieldMetaProps=None):
50-
"""Default approach to handling the inputs and outputs"""
51-
input, suppl = default_open_input(inputDef, inputFormat)
52-
output,writer,outputBase = default_open_output(outputDef, defaultOutput, outputFormat, thinOutput=thinOutput,
53-
valueClassMappings=valueClassMappings, datasetMetaProps=datasetMetaProps, fieldMetaProps=fieldMetaProps)
54-
return input,output,suppl,writer,outputBase
55-
56-
57-
def default_open_input(inputDef, inputFormat):
58-
if not inputDef and not inputFormat:
59-
raise ValueError('Must specify either an input file name or an input format (or both)')
60-
elif inputFormat == 'sdf' or (inputDef and (inputDef.lower().endswith('.sdf') or inputDef.lower().endswith('.sdf.gz'))):
61-
input, suppl = default_open_input_sdf(inputDef)
62-
elif inputFormat == 'json' or (inputDef and (inputDef.lower().endswith('.data') or inputDef.lower().endswith('.data.gz'))):
63-
input, suppl = default_open_input_json(inputDef)
64-
else:
65-
raise ValueError('Unsupported input format')
66-
67-
return input, suppl
32+
return round(x, sig - int(floor(log10(abs(x)))) - 1)
6833

6934

7035
def open_file(filename):
@@ -75,18 +40,20 @@ def open_file(filename):
7540
return open(filename, 'r')
7641

7742

78-
def create_simple_writer(outputDef, defaultOutput, outputFormat, fieldNames, compress=True, valueClassMappings=None, datasetMetaProps=None, fieldMetaProps=None):
79-
"""Create a simple writer suitable for writing flat data e.g. as BasicObject or TSV"""
43+
def create_simple_writer(outputDef, defaultOutput, outputFormat, fieldNames,
44+
compress=True, valueClassMappings=None,
45+
datasetMetaProps=None, fieldMetaProps=None):
46+
"""Create a simple writer suitable for writing flat data
47+
e.g. as BasicObject or TSV."""
8048

8149
if not outputDef:
8250
outputBase = defaultOutput
8351
else:
8452
outputBase = outputDef
8553

8654
if outputFormat == 'json':
87-
88-
write_squonk_datasetmetadata(outputBase, True, valueClassMappings, datasetMetaProps, fieldMetaProps)
89-
55+
write_squonk_datasetmetadata(outputBase, True, valueClassMappings,
56+
datasetMetaProps, fieldMetaProps)
9057
return BasicObjectWriter(open_output(outputDef, 'data', compress)), outputBase
9158

9259
elif outputFormat == 'tsv':
@@ -95,6 +62,7 @@ def create_simple_writer(outputDef, defaultOutput, outputFormat, fieldNames, com
9562
else:
9663
raise ValueError("Unsupported format: " + outputFormat)
9764

65+
9866
def open_output(basename, ext, compress):
9967
if basename:
10068
fname = basename + '.' + ext
@@ -110,6 +78,7 @@ def open_output(basename, ext, compress):
11078
else:
11179
return sys.stdout
11280

81+
11382
def write_squonk_datasetmetadata(outputBase, thinOutput, valueClassMappings, datasetMetaProps, fieldMetaProps):
11483
"""This is a temp hack to write the minimal metadata that Squonk needs.
11584
Will needs to be replaced with something that allows something more complete to be written.
@@ -150,7 +119,8 @@ def write_squonk_datasetmetadata(outputBase, thinOutput, valueClassMappings, dat
150119
def write_metrics(baseName, values):
151120
"""Write the metrics data
152121
153-
:param baseName: The base name of the output files. e.g. extensions will be appended to this base name
122+
:param baseName: The base name of the output files.
123+
e.g. extensions will be appended to this base name
154124
:param values dictionary of values to write
155125
"""
156126
m = open(baseName + '_metrics.txt', 'w')
@@ -161,7 +131,8 @@ def write_metrics(baseName, values):
161131

162132

163133
def generate_molecule_object_dict(source, format, values):
164-
"""Generate a dictionary that represents a Squonk MoleculeObject when writen as JSON
134+
"""Generate a dictionary that represents a Squonk MoleculeObject when
135+
writen as JSON
165136
166137
:param source: Molecules in molfile or smiles format
167138
:param format: The format of the molecule. Either 'mol' or 'smiles'

src/python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def get_long_description():
1313
setup(
1414

1515
name='im-pipelines-utils',
16-
version='2.1.0',
16+
version='2.2.0',
1717
author='Alan Christie',
1818
author_email='[email protected]',
1919
url='https://github.com/InformaticsMatters/pipelines-utils',

src/python/test/pipelines_utils/test_utils.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import unittest
22

3+
import os
4+
35
from pipelines_utils import utils
46

57

@@ -19,11 +21,51 @@ def test_round_sig(self):
1921
def test_create_simple_writer_for_json(self):
2022
"""Just check the very basics
2123
"""
22-
writer = utils.create_simple_writer('experiment', None, 'json', None)
24+
json_base = 'test_json'
25+
json_files = [json_base + '.metadata', json_base + '.data.gz']
26+
27+
writer = utils.create_simple_writer(json_base, None, 'json', None)
28+
2329
self.assertTrue(writer is not None)
30+
# Check files and clean-up
31+
for json_file in json_files:
32+
self.assertTrue(os.path.exists(json_file))
33+
os.remove(json_file)
2434

2535
def test_create_simple_writer_for_tsv(self):
2636
"""Just check the very basics
2737
"""
28-
writer = utils.create_simple_writer('experiment', None, 'tsv', None)
38+
tsv_base = 'test_tsv'
39+
tsv_files = [tsv_base + '.tsv.gz']
40+
41+
writer = utils.create_simple_writer('test_tsv', None, 'tsv', None)
42+
2943
self.assertTrue(writer is not None)
44+
# Check files and clean-up
45+
for tsv_file in tsv_files:
46+
self.assertTrue(os.path.exists(tsv_file))
47+
os.remove(tsv_file)
48+
49+
def test_write_metrics(self):
50+
"""Checks metrics are written.
51+
"""
52+
m_base = 'test_utils'
53+
m_filename = m_base + '_metrics.txt'
54+
55+
utils.write_metrics('test_utils', {'key_a': 'value_a',
56+
'key_b': 'value_b'})
57+
58+
# Read back...
59+
self.assertTrue(os.path.exists(m_filename))
60+
metrics = open(m_filename, 'r')
61+
found_a = False
62+
found_b = False
63+
for line in metrics.readlines():
64+
if line.strip() == 'key_a=value_a':
65+
found_a = True
66+
elif line.strip() == 'key_b=value_b':
67+
found_b = True
68+
metrics.close()
69+
os.remove(m_filename)
70+
self.assertTrue(found_a)
71+
self.assertTrue(found_b)

0 commit comments

Comments
 (0)