Skip to content

Commit 58103cb

Browse files
lthoangtqtg
authored andcommitted
Add UITup data format to Reader (#212)
1 parent 413f068 commit 58103cb

File tree

2 files changed

+16
-4
lines changed

2 files changed

+16
-4
lines changed

cornac/data/reader.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,14 @@ def uir_parser(tokens, **kwargs):
6363
return [(tokens[0], tokens[1], float(tokens[2]))]
6464

6565

66+
def tup_parser(tokens, **kwargs):
67+
return [(tokens[0], tokens[1], [tuple(tup.split(kwargs.get('tup_sep'))) for tup in tokens[2:]])]
68+
69+
6670
PARSERS = {
6771
'UI': ui_parser,
68-
'UIR': uir_parser
72+
'UIR': uir_parser,
73+
'UITup': tup_parser,
6974
}
7075

7176

@@ -139,7 +144,7 @@ def binarize(t): t = list(t); t[2] = 1; return tuple(t)
139144

140145
return tuples
141146

142-
def read(self, fpath, fmt='UIR', sep='\t', skip_lines=0, id_inline=False, parser=None):
147+
def read(self, fpath, fmt='UIR', sep='\t', skip_lines=0, id_inline=False, parser=None, **kwargs):
143148
"""Read data and parse line by line based on provided `fmt` or `parser`.
144149
145150
Parameters
@@ -172,12 +177,12 @@ def read(self, fpath, fmt='UIR', sep='\t', skip_lines=0, id_inline=False, parser
172177
depends on `parser` or `fmt`.
173178
174179
"""
175-
parser = PARSERS.get(fmt.upper(), None) if parser is None else parser
180+
parser = PARSERS.get(fmt, None) if parser is None else parser
176181
if parser is None:
177182
raise ValueError('Invalid line format: {}\n'
178183
'Only support: {}'.format(fmt, PARSERS.keys()))
179184
with open(fpath, encoding=self.encoding, errors=self.errors) as f:
180185
tuples = [tup
181186
for idx, line in enumerate(itertools.islice(f, skip_lines, None))
182-
for tup in parser(line.strip().split(sep), line_idx=idx, id_inline=id_inline)]
187+
for tup in parser(line.strip().split(sep), line_idx=idx, id_inline=id_inline, **kwargs)]
183188
return self.filter(tuples)

tests/cornac/data/test_reader.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ def test_read_uir(self):
4848
self.assertEqual(triplet_data[6][1], '478')
4949
self.assertEqual(triplet_data[8][0], '543')
5050

51+
def test_read_tup(self):
52+
tup_data = self.reader.read(self.data_file, fmt='UITup')
53+
self.assertEqual(len(tup_data), 10)
54+
self.assertEqual(tup_data[4][2], [('3',), ('891656347',)])
55+
self.assertEqual(tup_data[6][1], '478')
56+
self.assertEqual(tup_data[8][0], '543')
57+
5158
def test_filter(self):
5259
reader = Reader(bin_threshold=4.0)
5360
data = reader.read(self.data_file)

0 commit comments

Comments
 (0)