Skip to content

Commit a45ed17

Browse files
committed
Mostly cleanup
A few cleanup changes preparing for the next release, but also adding back in read_lines to fileio. Turns out rcha_metab uses it and it is easier to leave it in the mwtab package.
1 parent 240e5c0 commit a45ed17

File tree

11 files changed

+264
-280
lines changed

11 files changed

+264
-280
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
Release History
22
===============
33

4-
1.2.6
5-
~~~~
4+
2.0.0
5+
~~~~~
66
-Can now read duplicate keys in "Additional sample data" and reproduce it in write, will validate it as an error.
77
-Writing out now ensures correct key ordering for JSON.
88
-Validation now validates the main sections not just the internals of them.
99
-Validate now checks that metabolites in the Data section are in the Metabolites section and vice versa.
1010
-Batch processing from the command line is more fault tolerant and won't stop the batch for 1 bad file.
11+
-Improved tokenizer so more files can be read in without error.
1112
-Changed schema validation to use jsonschema instead of schema.
1213
-Added validations for METABOLITES columns that try to give warnings for bad values, for example 'kegg_id' column should all be something like C00000.
1314
-Expanded the standard column name functionality to look for many more column names than in the previous version and do it in a much more robust way.

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
The Clear BSD License
22

3-
Copyright (c) 2020, Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley
3+
Copyright (c) 2025, P. Travis Thompson, Christian D. Powell, Andrey Smelter, Hunter N.B. Moseley
44
All rights reserved.
55

66
Redistribution and use in source and binary forms, with or without

docs/todo.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
TODO
22
====
33

4+
Add options to validate CLI and validate method in mwtab to save out the new JSON file.
5+
46
Add --limit or --ignore option to validate to filter out certain types of errors/warnings. Need to first create some classifications to tag them with.
57

68
Think about extending METABOLITES and EXTENDED blocks with an "Attributes" line like "Factors" in DATA block as a way to add more information about the columns themselves.

src/mwtab/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
def main():
1111
doc = [line for line in cli.__doc__.split('\n')]
1212
doc = doc[:3] + [line.lstrip() for line in doc[5:]]
13-
doc = doc.join('\n')
13+
doc = '\n'.join(doc)
1414
args = docopt.docopt(cli.__doc__, version=__version__)
1515
cli.cli(args)
1616

src/mwtab/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
Documentation webpage: https://moseleybioinformaticslab.github.io/mwtab/
5050
GitHub webpage: https://github.com/MoseleyBioinformaticsLab/mwtab
5151
"""
52+
## TODO add options to vlaidate to save out the new JSON.
5253

5354
from os import getcwd
5455
from os.path import join, isfile

src/mwtab/fileio.py

Lines changed: 20 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
from functools import partial
2828

2929
from . import mwtab
30-
from . import validator
31-
from . import mwschema
3230
from . import mwrest
3331

3432
from urllib.request import urlopen
@@ -195,123 +193,27 @@ def read_with_class(sources: str|list[str], read_class: type, class_kwds: dict,
195193
read_files = partial(read_with_class, read_class = mwtab.MWTabFile, class_kwds = {"duplicate_keys": True})
196194
read_mwrest = partial(read_with_class, read_class = mwrest.MWRESTFile, class_kwds = {})
197195

198-
199-
# TODO delete this functions after testing.
200-
# def read_files(sources, return_exceptions=False):
201-
# """Construct a generator that yields file instances.
202-
203-
# :param sources: One or more strings representing path to file(s).
204-
# :param bool return_exceptions: Whether to yield a tuple with file instance and exception or just the file instance.
205-
# """
206-
# try:
207-
# filenames = _generate_filenames(sources, True)
208-
# filehandles = _generate_handles(filenames, True)
209-
# except Exception as e:
210-
# yield _return_correct_yield(None,
211-
# exception=e,
212-
# return_exceptions=return_exceptions)
213-
# for fh, source, exc in filehandles:
214-
# if exc is not None:
215-
# yield _return_correct_yield(source,
216-
# exception=exc,
217-
# return_exceptions=return_exceptions)
218-
# continue
219-
# try:
220-
# f = mwtab.MWTabFile(source, duplicate_keys=True)
221-
# f.read(fh)
222-
# fh.close()
223-
224-
# if VERBOSE:
225-
# print("Processed file: {}".format(os.path.abspath(source)))
226-
227-
# yield _return_correct_yield(f,
228-
# exception=None,
229-
# return_exceptions=return_exceptions)
230-
231-
# except Exception as e:
232-
# fh.close()
233-
# if VERBOSE:
234-
# print("Error processing file: ", os.path.abspath(source), "\nReason:", e)
235-
# yield _return_correct_yield(source,
236-
# exception=e,
237-
# return_exceptions=return_exceptions)
238-
239-
240-
# def read_mwrest(*sources, return_exceptions=False):
241-
# """Construct a generator that yields file instances.
242-
243-
# :param sources: One or more strings representing path to file(s).
244-
# :param bool return_exceptions: Whether to yield a tuple with file instance and exception or just the file instance.
245-
# """
246-
# try:
247-
# filenames = _generate_filenames(sources, True)
248-
# filehandles = _generate_handles(filenames, True)
249-
# except Exception as e:
250-
# yield _return_correct_yield(None,
251-
# exception=e,
252-
# return_exceptions=return_exceptions)
253-
# for fh, source, exc in filehandles:
254-
# try:
255-
# f = mwrest.MWRESTFile(source)
256-
# f.read(fh)
257-
# fh.close()
258-
259-
# if VERBOSE:
260-
# print("Processed url: {}".format(source))
261-
262-
# yield _return_correct_yield(f,
263-
# exception=None,
264-
# return_exceptions=return_exceptions)
265-
266-
# except Exception as e:
267-
# fh.close()
268-
# if VERBOSE:
269-
# print("Error processing url: ", source, "\nReason:", e)
270-
# yield _return_correct_yield(None,
271-
# exception=e,
272-
# return_exceptions=return_exceptions)
273-
274-
# Unused function, leaving here for now.
275-
# def read_lines(*sources, return_exceptions=False):
276-
# """Construct a generator that yields file instances.
277-
278-
# :param sources: One or more strings representing path to file(s).
279-
# :param bool return_exceptions: Whether to yield a tuple with file instance and exception or just the file instance.
280-
# """
281-
# try:
282-
# filenames = _generate_filenames(sources, True)
283-
# filehandles = _generate_handles(filenames, True)
284-
# except Exception as e:
285-
# yield _return_correct_yield(None,
286-
# exception=e,
287-
# return_exceptions=return_exceptions)
288-
# for fh, source, exc in filehandles:
289-
# try:
290-
# string = fh.read()
291-
# fh.close()
292-
# if isinstance(string, str):
293-
# lines = string.replace("\r", "\n").split("\n")
294-
# elif isinstance(string, bytes):
295-
# lines = string.decode("utf-8").replace("\r", "\n").split("\n")
296-
# else:
297-
# raise TypeError("Expecting <class 'str'> or <class 'bytes'>, but {} was passed".format(type(string)))
298-
299-
# lines = [line for line in lines if line]
300-
301-
# if VERBOSE:
302-
# print("Processed file: {}".format(os.path.abspath(source)))
303-
304-
# yield _return_correct_yield((lines, source),
305-
# exception=None,
306-
# return_exceptions=return_exceptions)
196+
class ReadLines():
197+
def __init__(self, source, *args, **kwargs):
198+
self.source = source
199+
self.lines = []
200+
201+
def read(self, filehandle):
202+
"""
203+
"""
204+
string = filehandle.read()
205+
filehandle.close()
206+
if isinstance(string, str):
207+
lines = string.replace("\r", "\n").split("\n")
208+
elif isinstance(string, bytes):
209+
lines = string.decode("utf-8").replace("\r", "\n").split("\n")
210+
else:
211+
raise TypeError("Expecting <class 'str'> or <class 'bytes'>, but {} was passed".format(type(string)))
307212

308-
# except Exception as e:
309-
# fh.close()
310-
# if VERBOSE:
311-
# print("Error processing file: ", source, "\nReason:", e)
312-
# yield _return_correct_yield(source,
313-
# exception=e,
314-
# return_exceptions=return_exceptions)
213+
self.lines = [line for line in lines if line]
214+
215+
216+
read_lines = partial(read_with_class, read_class = ReadLines, class_kwds = {})
315217

316218

317219
class GenericFilePath(object):

src/mwtab/mwextract.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,6 @@ def write_metadata_csv(to_path, extracted_values, no_header=False):
166166
:return: None
167167
:rtype: :py:obj:`None`
168168
"""
169-
# if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])):
170-
# dirname = os.path.dirname(to_path)
171-
# if dirname:
172-
# os.makedirs(dirname)
173169
fileio._create_save_path(to_path)
174170

175171
if not os.path.splitext(to_path)[1]:
@@ -219,10 +215,6 @@ def write_metabolites_csv(to_path, extracted_values, no_header=False):
219215
num_samples
220216
])
221217

222-
# if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])):
223-
# dirname = os.path.dirname(to_path)
224-
# if dirname:
225-
# os.makedirs(dirname)
226218
fileio._create_save_path(to_path)
227219

228220
if not os.path.splitext(to_path)[1]:
@@ -286,10 +278,6 @@ def write_json(to_path, extracted_dict):
286278
:return: None
287279
:rtype: :py:obj:`None`
288280
"""
289-
# if not os.path.exists(os.path.dirname(os.path.splitext(to_path)[0])):
290-
# dirname = os.path.dirname(to_path)
291-
# if dirname:
292-
# os.makedirs(dirname)
293281
fileio._create_save_path(to_path)
294282

295283
if not os.path.splitext(to_path)[1]:

src/mwtab/mwrest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def generate_mwtab_urls(input_items, base_url=BASE_URL, output_format='txt', ret
137137
return_exceptions=return_exceptions)
138138

139139

140-
# Unused funciton. Leaving here for now.
140+
# Unused function. Leaving here for now.
141141
# def generate_urls(input_items, base_url=BASE_URL, return_exceptions=False, **kwds):
142142
# """
143143
# Method for creating a generator which yields validated Metabolomics Workbench REST urls.

src/mwtab/mwtab.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -348,14 +348,16 @@ def get_metabolites_data_as_pandas(self):
348348
"""
349349
return self.get_table_as_pandas('Data')
350350

351-
def validate(self, ms_schema = ms_required_schema, nmr_schema = nmr_required_schema, verbose = True):
351+
def validate(self, ms_schema: dict = ms_required_schema, nmr_schema: dict = nmr_required_schema, verbose: bool = True) -> (str, list[dict]):
352352
"""Validate the instance.
353353
354-
:param dict ms_schema: jsonschema to validate both the base parts of the file and the MS specific parts of the file.
355-
:param dict nmr_schema: jsonschema to validate both the base parts of the file and the NMR specific parts of the file.
356-
:param bool verbose: whether to be verbose or not.
357-
:return: Validated file and errors if verbose is False.
358-
:rtype: :py:class:`~mwtab.mwtab.MWTabFile`, _io.StringIO
354+
Args:
355+
ms_schema: jsonschema to validate both the base parts of the file and the MS specific parts of the file.
356+
nmr_schema: jsonschema to validate both the base parts of the file and the NMR specific parts of the file.
357+
verbose: whether to be verbose or not.
358+
359+
Returns:
360+
Error messages as a single string and error messages in JSON form. If verbose is True, then the single string will be None.
359361
"""
360362
return validate_file(
361363
mwtabfile=self,

0 commit comments

Comments
 (0)