Skip to content

Commit 41c717c

Browse files
author
Alan Christie
committed
- Now returns a dictionary of named type-converted values
1 parent a4a7123 commit 41c717c

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

src/python/pipelines_utils/TypedColumnReader.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,15 @@ def __init__(self, filename,
151151
# An entry for each column in the file and compiled by _handle_header
152152
# using the provided header or file content oin the first iteration.
153153
self._converters = []
154+
# The the column names extracted from the header
155+
self._column_names = []
154156

155157
def __iter__(self):
156-
"""Return the next typ-converted row from the file.
158+
"""Return the next type-converted row from the file.
157159
The first row is expected to be a header with optional
158160
type definitions.
159161
160-
:returns: A list of type-converted values for the next row
162+
:returns: A dictionary of type-converted values for the next row
161163
162164
:raises: ValueError if a column value cannot be converted
163165
:raises: ContentError if the column value is unknown or does not
@@ -183,10 +185,10 @@ def __iter__(self):
183185
if len(self._converters) == 0:
184186
raise ContentError(1, 1, None, 'Missing header')
185187

186-
# Construct a list of row column values,
188+
# Construct a dictionary of row column names and values,
187189
# applying type conversions based on the
188190
# type defined in the header....
189-
row_values = []
191+
row_content = {}
190192
col_index = 0
191193
# Convert...
192194
for col in row:
@@ -195,14 +197,15 @@ def __iter__(self):
195197
raise ContentError(col_index + 1, self._c_reader.line_num,
196198
None, 'Too many values')
197199
try:
198-
row_values.append(self._converters[col_index][1](col))
200+
row_content[self._column_names[col_index]] =\
201+
self._converters[col_index][1](col)
199202
except ValueError:
200203
raise ContentError(col_index + 1, self._c_reader.line_num,
201204
col,
202205
'Does not comply with column type')
203206
col_index += 1
204207

205-
yield row_values
208+
yield row_content
206209

207210
def _handle_hdr(self, hdr):
208211
"""Given the file header line (or one provided when the class
@@ -218,7 +221,10 @@ def _handle_hdr(self, hdr):
218221
if len(cell_parts) not in [1, 2]:
219222
raise ContentError(column_number, self._c_reader.line_num,
220223
cell, 'Expected name and type (up to 2 items)')
221-
name = cell_parts[0]
224+
name = cell_parts[0].strip()
225+
if len(name) == 0:
226+
raise ContentError(column_number, self._c_reader.line_num,
227+
cell, 'Column name is empty')
222228
if len(cell_parts) == 2:
223229
column_type = cell_parts[1].lower()
224230
if column_type not in CONVERTERS:
@@ -227,6 +233,7 @@ def _handle_hdr(self, hdr):
227233
# Unspecified - assume built-in 'string'
228234
column_type = 'string'
229235
self._converters.append([name, CONVERTERS[column_type]])
236+
self._column_names.append(name)
230237
column_number += 1
231238

232239
def __del__(self):

src/python/test/python2_3/pipelines_utils/test_TypedColumnReader.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,14 @@ def test_basic_example_a(self):
1414
test_file = os.path.join(DATA_DIR, 'TypedCsvReader.example.a.csv')
1515
test_file = TypedColumnReader.TypedColumnReader(test_file, column_sep=',')
1616
num_lines = 0
17-
for _ in test_file:
17+
first_row = {}
18+
for row in test_file:
19+
if num_lines == 0:
20+
first_row = row
1821
num_lines += 1
1922
self.assertEqual(2, num_lines)
23+
self.assertEqual('A string', first_row['one'])
24+
self.assertEqual('and finally', first_row['four'])
2025

2126
def test_basic_example_a_with_supplied_header(self):
2227
"""Test loading of a simple CSV file with a provided header

0 commit comments

Comments
 (0)