@@ -70,7 +70,6 @@ def parse(self):
7070
7171 comment = self .xmlnode_nodecontents ('comment' )
7272
73- volume = ''
7473 journal = self .xmlnode_nodecontents ('source' )
7574 if journal :
7675 journal = self .re_match_amp .sub ('&' , journal )
@@ -83,22 +82,21 @@ def parse(self):
8382 match = self .re_ASSL .search (refstr )
8483 if match :
8584 journal = 'ASSL'
86- volume = match .group (1 )
85+ volume = match .group (1 ) or match . group ( 2 ) or ''
8786 if not journal :
8887 journal = self .xmlnode_nodecontents ('conf-name' )
8988 if not journal :
9089 # see if it is thesis
9190 if self .re_thesis .search (refstr ):
9291 journal = 'Thesis'
9392
94- if not volume :
95- volume = self .xmlnode_nodecontents ('volume' ).lower ().replace ('vol' , '' ).strip ()
9693
94+ volume = self .xmlnode_nodecontents ('volume' )
9795 pages = self .xmlnode_nodecontents ('fpage' )
9896 series = self .xmlnode_nodecontents ('series' )
9997
100- type = self .xmlnode_attribute ('nlm-citation' , 'citation-type' ) or self .xmlnode_attribute ('citation' , 'citation-type' )
101- if comment and type in ['journal' , 'confproc' ] and not volume and not pages :
98+ cittype = self .xmlnode_attribute ('nlm-citation' , 'citation-type' ) or self .xmlnode_attribute ('citation' , 'citation-type' ) or self . xmlnode_attribute ( 'mixed-citation' , 'publication -type' )
99+ if comment and cittype in ['journal' , 'confproc' ] and not volume and not pages :
102100 try :
103101 volume , pages = comment .split ()
104102 except :
@@ -107,7 +105,7 @@ def parse(self):
107105 # these fields are already formatted the way we expect them
108106 self ['authors' ] = authors
109107 self ['year' ] = year
110- self ['jrlstr' ] = journal . replace ( 'amp' , '&' )
108+ self ['jrlstr' ] = journal
111109 self ['ttlstr' ] = title
112110 self ['volume' ] = self .parse_volume (volume )
113111 self ['page' ], self ['qualifier' ] = self .parse_pages (pages , letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" )
@@ -117,15 +115,6 @@ def parse(self):
117115 doi = self .parse_doi (refstr , comment )
118116 eprint = self .parse_eprint (refstr )
119117
120- # these fields are already formatted the way we expect them
121- self ['authors' ] = authors
122- self ['year' ] = year
123- self ['jrlstr' ] = journal
124- self ['ttlstr' ] = title
125- self ['volume' ] = volume
126- self ['page' ], self ['qualifier' ] = self .parse_pages (pages )
127- self ['pages' ] = self .combine_page_qualifier (self ['page' ], self ['qualifier' ])
128-
129118 if doi :
130119 self ['doi' ] = doi
131120 if eprint :
@@ -310,7 +299,7 @@ def process_and_dispatch(self) -> List[Dict[str, List[Dict[str, str]]]]:
310299 logger .error ("OUPFTxml: error parsing reference: %s" % error_desc )
311300
312301 references .append ({'bibcode' : bibcode , 'references' : parsed_references })
313- logger .debug ("%s: parsed %d references" % (bibcode , len (references )))
302+ logger .debug ("%s: parsed %d references out of %d found references " % (bibcode , len (parsed_references ), len ( block_references )))
314303
315304 return references
316305
@@ -319,8 +308,8 @@ def process_and_dispatch(self) -> List[Dict[str, List[Dict[str, str]]]]:
319308# It allows parsing references from either a file or a buffer, and if no input is provided,
320309# it runs a source test file to verify the functionality against expected parsed results.
321310# The test results are printed to indicate whether the parsing is successful or not.
322- from adsrefpipe .tests .unittests .stubdata import parsed_references
323311if __name__ == '__main__' : # pragma: no cover
312+ from adsrefpipe .tests .unittests .stubdata import parsed_references
324313 parser = argparse .ArgumentParser (description = 'Parse OUPFT references' )
325314 parser .add_argument ('-f' , '--filename' , help = 'the path to source file' )
326315 parser .add_argument ('-b' , '--buffer' , help = 'xml reference(s)' )
0 commit comments