@@ -63,9 +63,14 @@ def uir_parser(tokens, **kwargs):
6363 return [(tokens [0 ], tokens [1 ], float (tokens [2 ]))]
6464
6565
66+ def tup_parser (tokens , ** kwargs ):
67+ return [(tokens [0 ], tokens [1 ], [tuple (tup .split (kwargs .get ('tup_sep' ))) for tup in tokens [2 :]])]
68+
69+
6670PARSERS = {
6771 'UI' : ui_parser ,
68- 'UIR' : uir_parser
72+ 'UIR' : uir_parser ,
73+ 'UITup' : tup_parser ,
6974}
7075
7176
@@ -139,7 +144,7 @@ def binarize(t): t = list(t); t[2] = 1; return tuple(t)
139144
140145 return tuples
141146
142- def read (self , fpath , fmt = 'UIR' , sep = '\t ' , skip_lines = 0 , id_inline = False , parser = None ):
147+ def read (self , fpath , fmt = 'UIR' , sep = '\t ' , skip_lines = 0 , id_inline = False , parser = None , ** kwargs ):
143148 """Read data and parse line by line based on provided `fmt` or `parser`.
144149
145150 Parameters
@@ -172,12 +177,12 @@ def read(self, fpath, fmt='UIR', sep='\t', skip_lines=0, id_inline=False, parser
172177 depends on `parser` or `fmt`.
173178
174179 """
175- parser = PARSERS .get (fmt . upper () , None ) if parser is None else parser
180+ parser = PARSERS .get (fmt , None ) if parser is None else parser
176181 if parser is None :
177182 raise ValueError ('Invalid line format: {}\n '
178183 'Only support: {}' .format (fmt , PARSERS .keys ()))
179184 with open (fpath , encoding = self .encoding , errors = self .errors ) as f :
180185 tuples = [tup
181186 for idx , line in enumerate (itertools .islice (f , skip_lines , None ))
182- for tup in parser (line .strip ().split (sep ), line_idx = idx , id_inline = id_inline )]
187+ for tup in parser (line .strip ().split (sep ), line_idx = idx , id_inline = id_inline , ** kwargs )]
183188 return self .filter (tuples )
0 commit comments