@@ -581,8 +581,8 @@ def _build_block(self, name, lexer):
581581 self ._raw_binned_headers = token_value
582582 self ._raw_samples = self ._raw_binned_headers
583583 if token .key == "Bin range(ppm)" :
584- self ._binned_header = token_value [1 :]
585- self ._samples = self ._binned_header
584+ # self._binned_header = token_value[1:]
585+ # self._samples = self._binned_header
586586 is_header = True
587587 # Have seen Factors section in incorrect sections such as METABOLITES,
588588 # and seen multiple Factors sections in a single METABOLITE_DATA section.
@@ -602,24 +602,24 @@ def _build_block(self, name, lexer):
602602 if loop_count < 1 :
603603 self ._raw_samples = token_value
604604 # The last check for len(token_value) == 1 is for ones like AN000788.
605- if self . _samples is None and \
606- (any ( sample in ssf_samples for sample in token_value [ 1 : ]) or \
607- (len (token_value ) == 1 and token_value [0 ] in ['Samples' , 'metabolite name' , 'metabolite_name' ])):
608- self ._samples = token_value [1 :]
605+ if ( any ( sample in ssf_samples for sample in token_value [ 1 :]) or \
606+ (len ( token_value ) == 1 and token_value [ 0 ] in [ 'Samples' , 'metabolite name' , 'metabolite_name' ]) or \
607+ (len (ssf_samples ) == 0 and token_value [0 ] in ['Samples' , 'metabolite name' , 'metabolite_name' ])):
608+ # self._samples = token_value[1:]
609609 is_header = True
610610
611611 elif "METABOLITES" in section_name and loop_count < 2 :
612612 if loop_count < 1 :
613613 self ._raw_metabolite_header = token_value
614614 if token .key .lower () == "metabolite_name" :
615- self ._metabolite_header = token_value [1 :]
615+ # self._metabolite_header = token_value[1:]
616616 is_header = True
617617
618618 elif "EXTENDED" in section_name and loop_count < 2 :
619619 if loop_count < 1 :
620620 self ._raw_extended_metabolite_header = token_value
621621 if token .key .lower () == "metabolite_name" :
622- self ._extended_metabolite_header = token_value [1 :]
622+ # self._extended_metabolite_header = token_value[1:]
623623 is_header = True
624624
625625
@@ -666,12 +666,30 @@ def _build_block(self, name, lexer):
666666 token = next (lexer )
667667 loop_count += 1
668668
669+ # This makes it so all dicitonaries have the same number of values.
670+ # Let's say row 3 looks like {'Metabolite': 'asdf', 'col1': 'qwer', '': 2345}
671+ # The rows above don't have the '' entry, this code makes it so they do.
672+ if self ._duplicate_keys :
673+ data = [duplicates_dict .data for duplicates_dict in data ]
674+ data_df = pandas .DataFrame .from_records (data ).fillna ('' ).astype (str )
675+ data = data_df .to_dict (orient = 'records' )
676+ if self ._duplicate_keys :
677+ data = [DuplicatesDict (data_dict ) for data_dict in data ]
678+ min_header = [column if not column .endswith ('}}}' ) else re .match (DUPLICATE_KEY_REGEX , column ).group (1 )
679+ for column in data_df .columns ]
680+ min_header = min_header [1 :] if min_header [1 :] else None
681+
669682 if token .key .startswith ("METABOLITES" ):
670683 section ["Metabolites" ] = data
684+ self ._metabolite_header = min_header
671685 elif token .key .startswith ("EXTENDED_" ):
672686 section ["Extended" ] = data
687+ self ._extended_metabolite_header = min_header
673688 else :
674689 section ["Data" ] = data
690+ self ._samples = min_header
691+ if "BINNED_DATA" in section_name :
692+ self ._binned_header = min_header
675693
676694 elif token .key .endswith ("_RESULTS_FILE" ):
677695 key , results_file_dict = token
@@ -716,8 +734,11 @@ def print_file(self, f=sys.stdout, file_format="mwtab"):
716734 print ("#NMR" , file = f )
717735 else :
718736 print ("#{}" .format (key ), file = f )
719-
720- self .print_block (key , f = f , file_format = file_format )
737+
738+ if isinstance (self [key ], dict ):
739+ self .print_block (key , f = f , file_format = file_format )
740+ else :
741+ raise TypeError (f'Key/section "{ key } " is not a dictionary. It cannot be translated to the mwTab format.' )
721742 print ("#END" , file = f )
722743
723744 elif file_format == "json" :
0 commit comments