Skip to content

Commit f2f8fe8

Browse files
author
Alan Christie
committed
- TypedColumnReader now supports STDIN
1 parent adec947 commit f2f8fe8

File tree

2 files changed

+30
-24
lines changed

2 files changed

+30
-24
lines changed

src/python/pipelines_utils/TypedColumnReader.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
"""
2626

2727
import csv
28-
import gzip
2928

3029

3130
class Error(Exception):
@@ -118,13 +117,15 @@ class TypedColumnReader(object):
118117
"smiles,comment:string,hac:int,ratio:float"
119118
"""
120119

121-
def __init__(self, filename,
120+
def __init__(self, csv_file,
122121
column_sep='\t',
123122
type_sep=':',
124123
header=None):
125124
"""Basic initialiser.
126125
127-
:param filename: The typed CSV file name
126+
:param csvfile: The typed CSV file. csvfile can be any object which
127+
supports the iterator protocol and returns a string
128+
each time its next() method is called
128129
:param column_sep: The file column separator
129130
:param type_sep: The type separator
130131
:param header: An optional header. If provided the must not have
@@ -138,15 +139,10 @@ def __init__(self, filename,
138139
encouraged to add a header line to all new files.
139140
"""
140141

141-
self._filename = filename
142+
self._csv_file = csv_file
142143
self._type_sep = type_sep
143144
self._header = header
144145

145-
# Open the CSV file (which may be compressed)
146-
if filename.endswith('.gz'):
147-
self._csv_file = gzip.open(filename, 'rt')
148-
else:
149-
self._csv_file = open(filename, 'rt')
150146
self._c_reader = csv.reader(self._csv_file,
151147
delimiter=column_sep,
152148
skipinitialspace=True,
@@ -242,10 +238,3 @@ def _handle_hdr(self, hdr):
242238
self._converters.append([name, CONVERTERS[column_type]])
243239
self._column_names.append(name)
244240
column_number += 1
245-
246-
def __del__(self):
247-
"""Delete method.
248-
"""
249-
if self._csv_file:
250-
self._csv_file.close()
251-
self._csv_file = None

src/python/test/python2_3/pipelines_utils/test_TypedColumnReader.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import gzip
12
import os
23
import unittest
34

@@ -12,7 +13,8 @@ def test_basic_example_a(self):
1213
"""Test loading of a simple CSV file
1314
"""
1415
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.a.csv')
15-
test_file = TypedColumnReader.TypedColumnReader(test_file, column_sep=',')
16+
csv_file = open(test_file)
17+
test_file = TypedColumnReader.TypedColumnReader(csv_file, column_sep=',')
1618
num_lines = 0
1719
first_row = {}
1820
for row in test_file:
@@ -22,34 +24,40 @@ def test_basic_example_a(self):
2224
self.assertEqual(2, num_lines)
2325
self.assertEqual('A string', first_row['one'])
2426
self.assertEqual('and finally', first_row['four'])
27+
csv_file.close()
2528

2629
def test_basic_example_a_with_supplied_header(self):
2730
"""Test loading of a simple CSV file with a provided header
2831
"""
2932
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.a-no-header.csv')
30-
test_file = TypedColumnReader.TypedColumnReader(test_file,
33+
csv_file = open(test_file)
34+
test_file = TypedColumnReader.TypedColumnReader(csv_file,
3135
column_sep=',',
3236
header='one,two:int,three:float,four:string')
3337
num_lines = 0
3438
for _ in test_file:
3539
num_lines += 1
3640
self.assertEqual(2, num_lines)
41+
csv_file.close()
3742

3843
def test_basic_example_a_gzip(self):
3944
"""Test loading of a simple CSV file (gzipped)
4045
"""
4146
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.a.csv.gz')
42-
test_file = TypedColumnReader.TypedColumnReader(test_file, column_sep=',')
47+
csv_file = gzip.open(test_file, 'rt')
48+
test_file = TypedColumnReader.TypedColumnReader(csv_file, column_sep=',')
4349
num_lines = 0
4450
for _ in test_file:
4551
num_lines += 1
4652
self.assertEqual(2, num_lines)
53+
csv_file.close()
4754

4855
def test_basic_example_b_unknown_type(self):
4956
"""Test loading of a simple CSV file with a column type that is unknown
5057
"""
5158
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.b.csv')
52-
test_file = TypedColumnReader.TypedColumnReader(test_file, column_sep=',')
59+
csv_file = open(test_file)
60+
test_file = TypedColumnReader.TypedColumnReader(csv_file, column_sep=',')
5361
num_lines = 0
5462
got_exception = False
5563
try:
@@ -61,12 +69,14 @@ def test_basic_example_b_unknown_type(self):
6169
got_exception = True
6270
self.assertTrue(got_exception)
6371
self.assertEqual(0, num_lines)
72+
csv_file.close()
6473

6574
def test_basic_example_c_too_many_colons(self):
6675
"""Test loading of a simple CSV file with a column that has too many colons
6776
"""
6877
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.c.csv')
69-
test_file = TypedColumnReader.TypedColumnReader(test_file, column_sep=',')
78+
csv_file = open(test_file)
79+
test_file = TypedColumnReader.TypedColumnReader(csv_file, column_sep=',')
7080
num_lines = 0
7181
got_exception = False
7282
try:
@@ -79,12 +89,14 @@ def test_basic_example_c_too_many_colons(self):
7989
got_exception = True
8090
self.assertTrue(got_exception)
8191
self.assertEqual(0, num_lines)
92+
csv_file.close()
8293

8394
def test_basic_example_d_wrong_type(self):
8495
"""Test loading of a simple CSV file with a column that has a string as an int
8596
"""
8697
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.d.csv')
87-
test_file = TypedColumnReader.TypedColumnReader(test_file, column_sep=',')
98+
csv_file = open(test_file)
99+
test_file = TypedColumnReader.TypedColumnReader(csv_file, column_sep=',')
88100
num_lines = 0
89101
got_exception = False
90102
try:
@@ -98,22 +110,26 @@ def test_basic_example_d_wrong_type(self):
98110
got_exception = True
99111
self.assertTrue(got_exception)
100112
self.assertEqual(0, num_lines)
113+
csv_file.close()
101114

102115
def test_basic_example_d_tabs(self):
103116
"""Test loading of a simple CSV file with tab (default) separators
104117
"""
105118
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.e.csv')
106-
test_file = TypedColumnReader.TypedColumnReader(test_file)
119+
csv_file = open(test_file)
120+
test_file = TypedColumnReader.TypedColumnReader(csv_file)
107121
num_lines = 0
108122
for _ in test_file:
109123
num_lines += 1
110124
self.assertEqual(2, num_lines)
125+
csv_file.close()
111126

112127
def test_basic_example_d_too_many_values(self):
113128
"""Test loading of a simple CSV file with too many values
114129
"""
115130
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.f.csv')
116-
test_file = TypedColumnReader.TypedColumnReader(test_file, column_sep=',')
131+
csv_file = open(test_file)
132+
test_file = TypedColumnReader.TypedColumnReader(csv_file, column_sep=',')
117133
num_lines = 0
118134
got_exception = False
119135
try:
@@ -126,3 +142,4 @@ def test_basic_example_d_too_many_values(self):
126142
got_exception = True
127143
self.assertTrue(got_exception)
128144
self.assertEqual(0, num_lines)
145+
csv_file.close()

0 commit comments

Comments
 (0)