Skip to content

Commit 28c6f7c

Browse files
committed
Bug fixes
Some small bug fixes to make some previous major changes work.
1 parent 4dc1587 commit 28c6f7c

File tree

5 files changed

+93
-58
lines changed

5 files changed

+93
-58
lines changed

src/mwtab/cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ def cli(cmdargs):
231231
converter = Converter(from_path=cmdargs["<from-path>"],
232232
to_path=cmdargs["<to-path>"],
233233
from_format=cmdargs["--from-format"],
234-
to_format=cmdargs["--to-format"])
234+
to_format=cmdargs["--to-format"],
235+
force=force)
235236
converter.convert()
236237

237238
# mwtab validate ...

src/mwtab/converter.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
class Translator(object):
119119
"""Translator abstract class."""
120120

121-
def __init__(self, from_path, to_path, from_format=None, to_format=None):
121+
def __init__(self, from_path, to_path, from_format=None, to_format=None, force=False):
122122
"""Translator initializer.
123123
:param str from_path: Path to input file(s).
124124
:param str to_path: Path to output file(s).
@@ -131,6 +131,7 @@ def __init__(self, from_path, to_path, from_format=None, to_format=None):
131131
self.to_format = to_format
132132
self.from_path_compression = fileio.GenericFilePath.is_compressed(from_path)
133133
self.to_path_compression = fileio.GenericFilePath.is_compressed(to_path)
134+
self.force = force
134135

135136
def __iter__(self):
136137
"""Abstract iterator must be implemented in a subclass."""
@@ -264,6 +265,9 @@ def _to_dir(self, file_generator):
264265
print("Something went wrong when trying to convert " + f.source)
265266
traceback.print_exception(e, file=sys.stdout)
266267
print()
268+
269+
if os.path.exists(outpath):
270+
os.remove(outpath)
267271

268272
def _to_zipfile(self, file_generator):
269273
"""Convert files to zip archive.
@@ -344,14 +348,18 @@ def _to_textfile(self, file_generator):
344348
if file_generator.to_path.endswith(file_generator.file_extension[file_generator.to_format]) \
345349
else file_generator.to_path + file_generator.file_extension[file_generator.to_format]
346350

347-
with open(to_path, mode="w", encoding="utf-8") as outfile:
348-
for f in file_generator:
349-
try:
351+
352+
for f in file_generator:
353+
try:
354+
with open(to_path, mode="w", encoding="utf-8") as outfile:
350355
outfile.write(f.writestr(file_generator.to_format))
351-
except Exception as e:
352-
print("Something went wrong when trying to convert " + f.source)
353-
traceback.print_exception(e, file=sys.stdout)
354-
print()
356+
except Exception as e:
357+
print("Something went wrong when trying to convert " + f.source)
358+
traceback.print_exception(e, file=sys.stdout)
359+
print()
360+
361+
if os.path.exists(to_path):
362+
os.remove(to_path)
355363

356364
def _output_path(self, input_path, to_format, archive=False):
357365
"""Construct an output path string from an input path string.

src/mwtab/duplicates_dict.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ def __contains__(self, key):
6464
def __eq__(self, compare):
6565
return self.data == compare
6666

67+
def __ne__(self, compare):
68+
return self.data != compare
69+
6770
def __repr__(self):
6871
return self.data.__repr__().replace('OrderedDict', 'DuplicatesDict')
6972

src/mwtab/mwtab.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -581,8 +581,8 @@ def _build_block(self, name, lexer):
581581
self._raw_binned_headers = token_value
582582
self._raw_samples = self._raw_binned_headers
583583
if token.key == "Bin range(ppm)":
584-
self._binned_header = token_value[1:]
585-
self._samples = self._binned_header
584+
# self._binned_header = token_value[1:]
585+
# self._samples = self._binned_header
586586
is_header = True
587587
# Have seen Factors section in incorrect sections such as METABOLITES,
588588
# and seen multiple Factors sections in a single METABOLITE_DATA section.
@@ -602,24 +602,24 @@ def _build_block(self, name, lexer):
602602
if loop_count < 1:
603603
self._raw_samples = token_value
604604
# The last check for len(token_value) == 1 is for ones like AN000788.
605-
if self._samples is None and \
606-
(any(sample in ssf_samples for sample in token_value[1:]) or \
607-
(len(token_value) == 1 and token_value[0] in ['Samples', 'metabolite name', 'metabolite_name'])):
608-
self._samples = token_value[1:]
605+
if (any(sample in ssf_samples for sample in token_value[1:]) or \
606+
(len(token_value) == 1 and token_value[0] in ['Samples', 'metabolite name', 'metabolite_name']) or \
607+
(len(ssf_samples) == 0 and token_value[0] in ['Samples', 'metabolite name', 'metabolite_name'])):
608+
# self._samples = token_value[1:]
609609
is_header = True
610610

611611
elif "METABOLITES" in section_name and loop_count < 2:
612612
if loop_count < 1:
613613
self._raw_metabolite_header = token_value
614614
if token.key.lower() == "metabolite_name":
615-
self._metabolite_header = token_value[1:]
615+
# self._metabolite_header = token_value[1:]
616616
is_header = True
617617

618618
elif "EXTENDED" in section_name and loop_count < 2:
619619
if loop_count < 1:
620620
self._raw_extended_metabolite_header = token_value
621621
if token.key.lower() == "metabolite_name":
622-
self._extended_metabolite_header = token_value[1:]
622+
# self._extended_metabolite_header = token_value[1:]
623623
is_header = True
624624

625625

@@ -666,12 +666,30 @@ def _build_block(self, name, lexer):
666666
token = next(lexer)
667667
loop_count += 1
668668

669+
# This makes it so all dicitonaries have the same number of values.
670+
# Let's say row 3 looks like {'Metabolite': 'asdf', 'col1': 'qwer', '': 2345}
671+
# The rows above don't have the '' entry, this code makes it so they do.
672+
if self._duplicate_keys:
673+
data = [duplicates_dict.data for duplicates_dict in data]
674+
data_df = pandas.DataFrame.from_records(data).fillna('').astype(str)
675+
data = data_df.to_dict(orient='records')
676+
if self._duplicate_keys:
677+
data = [DuplicatesDict(data_dict) for data_dict in data]
678+
min_header = [column if not column.endswith('}}}') else re.match(DUPLICATE_KEY_REGEX, column).group(1)
679+
for column in data_df.columns]
680+
min_header = min_header[1:] if min_header[1:] else None
681+
669682
if token.key.startswith("METABOLITES"):
670683
section["Metabolites"] = data
684+
self._metabolite_header = min_header
671685
elif token.key.startswith("EXTENDED_"):
672686
section["Extended"] = data
687+
self._extended_metabolite_header = min_header
673688
else:
674689
section["Data"] = data
690+
self._samples = min_header
691+
if "BINNED_DATA" in section_name:
692+
self._binned_header = min_header
675693

676694
elif token.key.endswith("_RESULTS_FILE"):
677695
key, results_file_dict = token
@@ -716,8 +734,11 @@ def print_file(self, f=sys.stdout, file_format="mwtab"):
716734
print("#NMR", file=f)
717735
else:
718736
print("#{}".format(key), file=f)
719-
720-
self.print_block(key, f=f, file_format=file_format)
737+
738+
if isinstance(self[key], dict):
739+
self.print_block(key, f=f, file_format=file_format)
740+
else:
741+
raise TypeError(f'Key/section "{key}" is not a dictionary. It cannot be translated to the mwTab format.')
721742
print("#END", file=f)
722743

723744
elif file_format == "json":

0 commit comments

Comments
 (0)